diff --git a/evaluation/README.md b/evaluation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d461eeb02d148fb57489e2cfcce09bf0a8ebf721 --- /dev/null +++ b/evaluation/README.md @@ -0,0 +1,66 @@ +本项目soma-evaluation是一个开源的自动化测试平台,用户可以指定测试模版以及需要测试的agent进行性能测试。 + +## 基本功能 +- 接收页面请求参数,包含测试模版名称(测试场景)、待测agent类型等参数 +- 根据场景搭建测试环境并执行测试例 +- 收集环境、业务、agent的各项资源消耗以及服务指标 +- 整理结果并输出 + +![alt text](doc/images/image-2.png) + +## 软件架构 +evaluation主要由evaluation进程和runner进程构成。evaluation进程主要负责webapi的接收、runner的调度、测试结果的整合输出。runner进程主要负责测试用例的执行,根据参数初始化测试环境,执行指定测试用例,保存测试结果。 + +![alt text](doc/images/image-0.png) + +## 快速开始 + - 下载[eval-controller.yaml](./eval-charts/evaluation-controller/eval-controller.yaml)文件 + - 执行helm命令 + ``` + helm repo add evaluation https://df-evaluation.oss-cn-beijing.aliyuncs.com/chart + helm update + helm install evaluation evaluation/evaluation -n evaluation -f eval-controller.yaml + ``` + +## 参与贡献 +TODO + +## 调用流程 +web发送测试请求到Api-Server,Server将发送给Manager,Manage创建Runner子进程执行用例,测试执行完后Manager整合结果向外输出。 + +![alt text](doc/images/image-1.png) + +## 代码框架 +### 1. manager +调度进程 +- 测试runner的创建 +- 测试状态的监控和日志输出 +- 测试结果的汇总的输出 +- 测试请求的接收,排队 +### 2. Runner +测试例执行进程 +#### 2.1 agent-tools +各类采集器的调度类,每种采集器的tools需实现以下功能接口 + +[实现基类](https://github.com/deepflowio/df-evaluation/blob/main/eval-runner/eval-runner/agent_tools/base.py) + +- 创建、删除 +- 启动、停止 +- 状态正常检查 +- 配置写入 +- ... + +#### 2.2 cases +测试例的具体流程,实现以下场景: +- 极端高性能的业务场景(nginx-default-page)下部署agent前后性能表现 +- 典型云原生微服务场景(istio-bookinfo-demo)下部署agent前后性能表现 +- ... +#### 2.3 platform-tools +- Platform sdk base,每种云平台需实现该基类接口 + - 创建、删除虚拟机 + - 启动、停止虚拟机 + - 获取ip + - 获取状态 +- Aliyun sdk +### 3. Api-server +Http server 用于接收页面请求 diff --git a/evaluation/doc/images/image-0.png b/evaluation/doc/images/image-0.png new file mode 100644 index 0000000000000000000000000000000000000000..e093acd6cb167f48e8039af9d486f848219d5b6c Binary files /dev/null and b/evaluation/doc/images/image-0.png differ diff --git a/evaluation/doc/images/image-1.png b/evaluation/doc/images/image-1.png new file mode 100644 index 0000000000000000000000000000000000000000..97d7125dc154d9cb4add8d2bbf471ebe7f11226f Binary files /dev/null and b/evaluation/doc/images/image-1.png differ diff --git a/evaluation/doc/images/image-2.png b/evaluation/doc/images/image-2.png new file mode 100644 index 0000000000000000000000000000000000000000..67c8dac75f3adcbe2a0f7b78500926790f14b5e9 Binary files /dev/null and b/evaluation/doc/images/image-2.png differ diff --git a/evaluation/eval-bench/Dockerfile b/evaluation/eval-bench/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/evaluation/eval-bench/Makefile b/evaluation/eval-bench/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..a99985c6ed209b92a489488398146d45ea9589b4 --- /dev/null +++ b/evaluation/eval-bench/Makefile @@ -0,0 +1,16 @@ + +.PHONY: all +all: eb-amd64 + +.PHONY: eb-amd64 +eb-amd64: + CGO_ENABLED=0 GOOS=linux go build -o bin/x86_64/eb -a -ldflags '-extldflags "-static"' cmd/eb/main.go + +.PHONY: eb-arm64 +eb-arm64: + CGO_ENABLED=1 GOOS=linux GOARCH=arm CC=arm-linux-gnueabi-gcc go build -o bin/arm_64/eb -a -ldflags '-extldflags "-static"' cmd/eb/main.go + + +.PHONY: clean +clean: + rm -rf bin/eb \ No newline at end of file diff --git a/evaluation/eval-bench/README.md b/evaluation/eval-bench/README.md new file mode 100644 index 0000000000000000000000000000000000000000..91dec3bbe98c01061586e1386f19e857c30c5f8a --- /dev/null +++ b/evaluation/eval-bench/README.md @@ -0,0 +1,50 @@ +# Build + +go build + +# Run + +``` +Usage of ./eb: + -h string + Target Service "host:port" + -p int + DB Root Password + -r int + request per second + -d int + Execution time in seconds + -t int + Number of threads + -e string + Engine of DB [redis, mysql, mongo] + -c int + concurrent connections of each thread, only support mysql + -complexity int + complexity of query sql, only add count of select key + -method string + method of query, redis:[GET, SET] + -sql string + customizable sql of query, only support mysql +``` + +- Example: +``` +[root@iZ2zebdukco9jfpuo0shnaZ ~]# ./eb -e redis -h 127.0.0.1:6379 -p deepflow -r 50000 -t 5 -d 10 +2024/01/29 13:57:24 [*] Start redis App Traffic 127.0.0.1:6379, date rate 10000 rps. +2024/01/29 13:57:24 [*] Start redis App Traffic 127.0.0.1:6379, date rate 10000 rps. +2024/01/29 13:57:24 [*] Start redis App Traffic 127.0.0.1:6379, date rate 10000 rps. +2024/01/29 13:57:24 [*] Start redis App Traffic 127.0.0.1:6379, date rate 10000 rps. +2024/01/29 13:57:24 [*] Start redis App Traffic 127.0.0.1:6379, date rate 10000 rps. +now request count is 50020 , err is 0, cost time 1.001950s +now request count is 100021 , err is 0, cost time 2.001982s +now request count is 150068 , err is 0, cost time 3.002013s +now request count is 200076 , err is 0, cost time 4.002049s +now request count is 250076 , err is 0, cost time 5.002072s +now request count is 300076 , err is 0, cost time 6.002102s +now request count is 350040 , err is 0, cost time 7.002169s +now request count is 400082 , err is 0, cost time 8.002204s +now request count is 450070 , err is 0, cost time 9.002954s +now request count is 500071 , err is 0, cost time 10.002993s +total: 500395, count: 500395, error: 0, request/sec: 50024.43 avg: 39.512µs max: 3.845947ms p50: 34.687µs p90: 62.392µs +``` \ No newline at end of file diff --git a/evaluation/eval-bench/client/client.go b/evaluation/eval-bench/client/client.go new file mode 100644 index 0000000000000000000000000000000000000000..71a0d2bd52d32c5211f2a9c01151ca356cef3454 --- /dev/null +++ b/evaluation/eval-bench/client/client.go @@ -0,0 +1,9 @@ +package client + +type EngineClient interface { + Exec() error + InitClient() + Close() + IsReady() bool + Property() +} diff --git a/evaluation/eval-bench/client/grpc/grpc-client.go b/evaluation/eval-bench/client/grpc/grpc-client.go new file mode 100644 index 0000000000000000000000000000000000000000..4502a69bc0d1eb35ae7247fd1cb98b851554caac --- /dev/null +++ b/evaluation/eval-bench/client/grpc/grpc-client.go @@ -0,0 +1,68 @@ +package grpc + +import ( + "context" + "log" + "time" + + pb "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/grpc/pb" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +//go:generate mkdir ./pb +//go:generate protoc --go_out=./pb --go_opt=paths=source_relative --go-grpc_out=./pb --go-grpc_opt=paths=source_relative pb.proto +type GrpcClient struct { + isReady bool + + LatencyChan chan *time.Duration + ErrLatencyChan chan *time.Duration + + Addr string + Client pb.GreeterClient + Conn *grpc.ClientConn +} + +func (gc *GrpcClient) Property() { + log.Printf("GrpcClient Property:") + log.Printf("Addr: %s", gc.Addr) +} + +func (gc *GrpcClient) InitClient() { + conn, err := grpc.Dial(gc.Addr, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + log.Fatalf("connect failed: %v", err) + } + gc.Conn = conn + gc.Client = pb.NewGreeterClient(conn) + gc.isReady = true +} + +func (gc *GrpcClient) IsReady() bool { + return gc.isReady +} + +func (gc *GrpcClient) Close() { + if gc.Client != nil { + gc.Conn.Close() + } +} + +func (gc *GrpcClient) Exec() error { + start := time.Now() + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + _, err := gc.Client.SayHello(ctx, &pb.HelloRequest{ + Name1: "hello", Name2: "hello", Name3: "hello", Name4: "hello", + Name5: "hello", Name6: "hello", Name7: "hello", Name8: "hello", + Name9: "hello", Name10: "hello", + }) + latency := time.Since(start) + if err != nil { + gc.ErrLatencyChan <- &latency + log.Printf("unable to send message: %v", err) + } else { + gc.LatencyChan <- &latency + } + return err +} diff --git a/evaluation/eval-bench/client/grpc/pb.proto b/evaluation/eval-bench/client/grpc/pb.proto new file mode 100644 index 0000000000000000000000000000000000000000..d7fe67808d3f9e36cb01b6cf01b143ed164065ef --- /dev/null +++ b/evaluation/eval-bench/client/grpc/pb.proto @@ -0,0 +1,38 @@ +syntax = "proto3"; + +option go_package = "./pb"; +package pb; + +// The greeting service definition. +service Greeter { + // Sends a greeting + rpc SayHello (HelloRequest) returns (HelloReply) {} +} + +// The request message containing the user's name. +message HelloRequest { + string name1 = 1; + string name2 = 2; + string name3 = 3; + string name4 = 4; + string name5 = 5; + string name6 = 6; + string name7 = 7; + string name8 = 8; + string name9 = 9; + string name10 = 10; +} + +// The response message containing the greetings +message HelloReply { + string message1 = 1; + string message2 = 2; + string message3 = 3; + string message4 = 4; + string message5 = 5; + string message6 = 6; + string message7 = 7; + string message8 = 8; + string message9 = 9; + string message10 = 10; +} diff --git a/evaluation/eval-bench/client/grpc/pb/pb.pb.go b/evaluation/eval-bench/client/grpc/pb/pb.pb.go new file mode 100644 index 0000000000000000000000000000000000000000..c9b978986c5f58ce6c26ab36b53732f6144b8a94 --- /dev/null +++ b/evaluation/eval-bench/client/grpc/pb/pb.pb.go @@ -0,0 +1,384 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.32.0 +// protoc v3.12.4 +// source: pb.proto + +package pb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// The request message containing the user's name. +type HelloRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Name1 string `protobuf:"bytes,1,opt,name=name1,proto3" json:"name1,omitempty"` + Name2 string `protobuf:"bytes,2,opt,name=name2,proto3" json:"name2,omitempty"` + Name3 string `protobuf:"bytes,3,opt,name=name3,proto3" json:"name3,omitempty"` + Name4 string `protobuf:"bytes,4,opt,name=name4,proto3" json:"name4,omitempty"` + Name5 string `protobuf:"bytes,5,opt,name=name5,proto3" json:"name5,omitempty"` + Name6 string `protobuf:"bytes,6,opt,name=name6,proto3" json:"name6,omitempty"` + Name7 string `protobuf:"bytes,7,opt,name=name7,proto3" json:"name7,omitempty"` + Name8 string `protobuf:"bytes,8,opt,name=name8,proto3" json:"name8,omitempty"` + Name9 string `protobuf:"bytes,9,opt,name=name9,proto3" json:"name9,omitempty"` + Name10 string `protobuf:"bytes,10,opt,name=name10,proto3" json:"name10,omitempty"` +} + +func (x *HelloRequest) Reset() { + *x = HelloRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_pb_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *HelloRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*HelloRequest) ProtoMessage() {} + +func (x *HelloRequest) ProtoReflect() protoreflect.Message { + mi := &file_pb_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use HelloRequest.ProtoReflect.Descriptor instead. +func (*HelloRequest) Descriptor() ([]byte, []int) { + return file_pb_proto_rawDescGZIP(), []int{0} +} + +func (x *HelloRequest) GetName1() string { + if x != nil { + return x.Name1 + } + return "" +} + +func (x *HelloRequest) GetName2() string { + if x != nil { + return x.Name2 + } + return "" +} + +func (x *HelloRequest) GetName3() string { + if x != nil { + return x.Name3 + } + return "" +} + +func (x *HelloRequest) GetName4() string { + if x != nil { + return x.Name4 + } + return "" +} + +func (x *HelloRequest) GetName5() string { + if x != nil { + return x.Name5 + } + return "" +} + +func (x *HelloRequest) GetName6() string { + if x != nil { + return x.Name6 + } + return "" +} + +func (x *HelloRequest) GetName7() string { + if x != nil { + return x.Name7 + } + return "" +} + +func (x *HelloRequest) GetName8() string { + if x != nil { + return x.Name8 + } + return "" +} + +func (x *HelloRequest) GetName9() string { + if x != nil { + return x.Name9 + } + return "" +} + +func (x *HelloRequest) GetName10() string { + if x != nil { + return x.Name10 + } + return "" +} + +// The response message containing the greetings +type HelloReply struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Message1 string `protobuf:"bytes,1,opt,name=message1,proto3" json:"message1,omitempty"` + Message2 string `protobuf:"bytes,2,opt,name=message2,proto3" json:"message2,omitempty"` + Message3 string `protobuf:"bytes,3,opt,name=message3,proto3" json:"message3,omitempty"` + Message4 string `protobuf:"bytes,4,opt,name=message4,proto3" json:"message4,omitempty"` + Message5 string `protobuf:"bytes,5,opt,name=message5,proto3" json:"message5,omitempty"` + Message6 string `protobuf:"bytes,6,opt,name=message6,proto3" json:"message6,omitempty"` + Message7 string `protobuf:"bytes,7,opt,name=message7,proto3" json:"message7,omitempty"` + Message8 string `protobuf:"bytes,8,opt,name=message8,proto3" json:"message8,omitempty"` + Message9 string `protobuf:"bytes,9,opt,name=message9,proto3" json:"message9,omitempty"` + Message10 string `protobuf:"bytes,10,opt,name=message10,proto3" json:"message10,omitempty"` +} + +func (x *HelloReply) Reset() { + *x = HelloReply{} + if protoimpl.UnsafeEnabled { + mi := &file_pb_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *HelloReply) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*HelloReply) ProtoMessage() {} + +func (x *HelloReply) ProtoReflect() protoreflect.Message { + mi := &file_pb_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use HelloReply.ProtoReflect.Descriptor instead. +func (*HelloReply) Descriptor() ([]byte, []int) { + return file_pb_proto_rawDescGZIP(), []int{1} +} + +func (x *HelloReply) GetMessage1() string { + if x != nil { + return x.Message1 + } + return "" +} + +func (x *HelloReply) GetMessage2() string { + if x != nil { + return x.Message2 + } + return "" +} + +func (x *HelloReply) GetMessage3() string { + if x != nil { + return x.Message3 + } + return "" +} + +func (x *HelloReply) GetMessage4() string { + if x != nil { + return x.Message4 + } + return "" +} + +func (x *HelloReply) GetMessage5() string { + if x != nil { + return x.Message5 + } + return "" +} + +func (x *HelloReply) GetMessage6() string { + if x != nil { + return x.Message6 + } + return "" +} + +func (x *HelloReply) GetMessage7() string { + if x != nil { + return x.Message7 + } + return "" +} + +func (x *HelloReply) GetMessage8() string { + if x != nil { + return x.Message8 + } + return "" +} + +func (x *HelloReply) GetMessage9() string { + if x != nil { + return x.Message9 + } + return "" +} + +func (x *HelloReply) GetMessage10() string { + if x != nil { + return x.Message10 + } + return "" +} + +var File_pb_proto protoreflect.FileDescriptor + +var file_pb_proto_rawDesc = []byte{ + 0x0a, 0x08, 0x70, 0x62, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x02, 0x70, 0x62, 0x22, 0xec, + 0x01, 0x0a, 0x0c, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x31, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x6e, 0x61, 0x6d, 0x65, 0x31, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x32, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x32, 0x12, 0x14, 0x0a, 0x05, 0x6e, + 0x61, 0x6d, 0x65, 0x33, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, + 0x33, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x34, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x34, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x35, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x35, 0x12, 0x14, 0x0a, + 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x36, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6e, 0x61, + 0x6d, 0x65, 0x36, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x37, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x37, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, + 0x65, 0x38, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x38, 0x12, + 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x39, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x6e, 0x61, 0x6d, 0x65, 0x39, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x61, 0x6d, 0x65, 0x31, 0x30, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6e, 0x61, 0x6d, 0x65, 0x31, 0x30, 0x22, 0xa6, 0x02, + 0x0a, 0x0a, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x1a, 0x0a, 0x08, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x31, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x31, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x32, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x32, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x33, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x33, + 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x34, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x34, 0x12, 0x1a, 0x0a, 0x08, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x35, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x35, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x36, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x36, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x37, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x37, + 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x38, 0x18, 0x08, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x38, 0x12, 0x1a, 0x0a, 0x08, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x39, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x39, 0x12, 0x1c, 0x0a, 0x09, 0x6d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x31, 0x30, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x31, 0x30, 0x32, 0x39, 0x0a, 0x07, 0x47, 0x72, 0x65, 0x65, 0x74, 0x65, + 0x72, 0x12, 0x2e, 0x0a, 0x08, 0x53, 0x61, 0x79, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x12, 0x10, 0x2e, + 0x70, 0x62, 0x2e, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x0e, 0x2e, 0x70, 0x62, 0x2e, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, + 0x00, 0x42, 0x06, 0x5a, 0x04, 0x2e, 0x2f, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, +} + +var ( + file_pb_proto_rawDescOnce sync.Once + file_pb_proto_rawDescData = file_pb_proto_rawDesc +) + +func file_pb_proto_rawDescGZIP() []byte { + file_pb_proto_rawDescOnce.Do(func() { + file_pb_proto_rawDescData = protoimpl.X.CompressGZIP(file_pb_proto_rawDescData) + }) + return file_pb_proto_rawDescData +} + +var file_pb_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_pb_proto_goTypes = []interface{}{ + (*HelloRequest)(nil), // 0: pb.HelloRequest + (*HelloReply)(nil), // 1: pb.HelloReply +} +var file_pb_proto_depIdxs = []int32{ + 0, // 0: pb.Greeter.SayHello:input_type -> pb.HelloRequest + 1, // 1: pb.Greeter.SayHello:output_type -> pb.HelloReply + 1, // [1:2] is the sub-list for method output_type + 0, // [0:1] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_pb_proto_init() } +func file_pb_proto_init() { + if File_pb_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pb_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*HelloRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pb_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*HelloReply); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pb_proto_rawDesc, + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_pb_proto_goTypes, + DependencyIndexes: file_pb_proto_depIdxs, + MessageInfos: file_pb_proto_msgTypes, + }.Build() + File_pb_proto = out.File + file_pb_proto_rawDesc = nil + file_pb_proto_goTypes = nil + file_pb_proto_depIdxs = nil +} diff --git a/evaluation/eval-bench/client/grpc/pb/pb_grpc.pb.go b/evaluation/eval-bench/client/grpc/pb/pb_grpc.pb.go new file mode 100644 index 0000000000000000000000000000000000000000..89c2343c19ba301194639577efa82c3648c2624c --- /dev/null +++ b/evaluation/eval-bench/client/grpc/pb/pb_grpc.pb.go @@ -0,0 +1,111 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.3.0 +// - protoc v3.12.4 +// source: pb.proto + +package pb + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +const ( + Greeter_SayHello_FullMethodName = "/pb.Greeter/SayHello" +) + +// GreeterClient is the client API for Greeter service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type GreeterClient interface { + // Sends a greeting + SayHello(ctx context.Context, in *HelloRequest, opts ...grpc.CallOption) (*HelloReply, error) +} + +type greeterClient struct { + cc grpc.ClientConnInterface +} + +func NewGreeterClient(cc grpc.ClientConnInterface) GreeterClient { + return &greeterClient{cc} +} + +func (c *greeterClient) SayHello(ctx context.Context, in *HelloRequest, opts ...grpc.CallOption) (*HelloReply, error) { + out := new(HelloReply) + err := c.cc.Invoke(ctx, Greeter_SayHello_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// GreeterServer is the server API for Greeter service. +// All implementations must embed UnimplementedGreeterServer +// for forward compatibility +type GreeterServer interface { + // Sends a greeting + SayHello(context.Context, *HelloRequest) (*HelloReply, error) + mustEmbedUnimplementedGreeterServer() +} + +// UnimplementedGreeterServer must be embedded to have forward compatible implementations. +type UnimplementedGreeterServer struct { +} + +func (UnimplementedGreeterServer) SayHello(context.Context, *HelloRequest) (*HelloReply, error) { + return nil, status.Errorf(codes.Unimplemented, "method SayHello not implemented") +} +func (UnimplementedGreeterServer) mustEmbedUnimplementedGreeterServer() {} + +// UnsafeGreeterServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to GreeterServer will +// result in compilation errors. +type UnsafeGreeterServer interface { + mustEmbedUnimplementedGreeterServer() +} + +func RegisterGreeterServer(s grpc.ServiceRegistrar, srv GreeterServer) { + s.RegisterService(&Greeter_ServiceDesc, srv) +} + +func _Greeter_SayHello_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(HelloRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(GreeterServer).SayHello(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Greeter_SayHello_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(GreeterServer).SayHello(ctx, req.(*HelloRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// Greeter_ServiceDesc is the grpc.ServiceDesc for Greeter service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var Greeter_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "pb.Greeter", + HandlerType: (*GreeterServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "SayHello", + Handler: _Greeter_SayHello_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "pb.proto", +} diff --git a/evaluation/eval-bench/client/http/http-client.go b/evaluation/eval-bench/client/http/http-client.go new file mode 100644 index 0000000000000000000000000000000000000000..d51d81cc39a30e7d4dc5495211cd1f0d54ad2eb6 --- /dev/null +++ b/evaluation/eval-bench/client/http/http-client.go @@ -0,0 +1,179 @@ +package http + +import ( + "bytes" + "crypto/tls" + "fmt" + "io" + "log" + "net" + "net/http" + "time" + + uuid "github.com/satori/go.uuid" + + "golang.org/x/net/http2" +) + +type HttpClient struct { + isReady bool + req *http.Request + reqBody io.ReadCloser + + LatencyChan chan *time.Duration + ErrLatencyChan chan *time.Duration + + Addr string + Client *http.Client + Method string + Complexity int + DataSize int + KeepAlive bool + TLS bool + H2C bool +} + +func (hc *HttpClient) Property() { + log.Println("HttpClient Property:") + log.Printf("Addr: %s\n", hc.Addr) + log.Printf("Method: %s\n", hc.Method) + log.Printf("Complexity: %d\n", hc.Complexity) + log.Printf("DataSize: %d\n", hc.DataSize) + log.Printf("KeepAlive: %t\n", hc.KeepAlive) + log.Printf("TLS: %t\n", hc.TLS) + log.Printf("H2C: %t\n", hc.H2C) +} + +// InitClient 初始化HttpClient。 +// 此函数配置HTTP请求的方法、客户端类型、请求体大小、请求头等,并准备执行HTTP请求。 +// 对于不同的配置,会创建相应的http.Client实例以支持HTTP/2连接或TLS连接。 +// 在请求完成后,根据是否保持连接的设置,可能打印响应状态码和响应体长度。 +func (hc *HttpClient) InitClient() { + var err error + if hc.Method == "" { + hc.Method = "GET" // 默认请求方法为GET + } + + disableKeepAlives := false + if !hc.KeepAlive { + disableKeepAlives = true + } + + if hc.H2C { + // 使用HTTP/2客户端,跳过TLS握手 + hc.Client = &http.Client{ + Transport: &http2.Transport{ + AllowHTTP: true, + DialTLS: func(network, addr string, cfg *tls.Config) (net.Conn, error) { + // 直接建立TCP连接,不进行TLS握手 + return net.Dial(network, addr) + }, + }, + } + } else { + if hc.TLS { + // 使用支持TLS的HTTP客户端,且不验证TLS证书 + hc.Client = &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: disableKeepAlives, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, // 连接超时时间 + KeepAlive: 60 * time.Second, // 保持长连接的时间 + }).DialContext, // 设置连接的参数 + MaxIdleConns: 500, // 最大空闲连接 + IdleConnTimeout: 60 * time.Second, // 空闲连接的超时时间 + ExpectContinueTimeout: 30 * time.Second, // 等待服务第一个响应的超时时间 + MaxIdleConnsPerHost: 100, // 每个host保持的空闲连接数 + }, + } + } else { + // 使用默认的HTTP客户端 + hc.Client = &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: disableKeepAlives, // 使用长连接 + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, // 连接超时时间 + KeepAlive: 60 * time.Second, // 保持长连接的时间 + }).DialContext, // 设置连接的参数 + MaxIdleConns: 500, // 最大空闲连接 + IdleConnTimeout: 60 * time.Second, // 空闲连接的超时时间 + ExpectContinueTimeout: 30 * time.Second, // 等待服务第一个响应的超时时间 + MaxIdleConnsPerHost: 100, // 每个host保持的空闲连接数 + }, + } + } + } + + // 创建请求体,其大小由hc.DataSize指定 + hc.reqBody = io.NopCloser(bytes.NewReader(bytes.Repeat([]byte("A"), hc.DataSize))) + + // 创建HTTP请求 + hc.req, err = http.NewRequest(hc.Method, hc.Addr, hc.reqBody) + if err != nil { + // 处理创建请求失败的情况 + log.Fatal(fmt.Errorf("error making request: %v", err)) + } + + // 根据复杂度设置请求头 + for i := 0; i < hc.Complexity; i++ { + hc.req.Header.Set(fmt.Sprintf("token%d", i), uuid.NewV1().String()) + } + + hc.req.ContentLength = int64(hc.DataSize) // 设置请求体内容长度 + if hc.KeepAlive{ + // 执行HTTP请求,并处理响应 + resp, err := hc.Client.Do(hc.req) + if err != nil { + // 处理请求执行失败的情况 + log.Fatal(fmt.Errorf("error do request: %v", err)) + } + defer resp.Body.Close() // 确保响应体关闭 + body, err := io.ReadAll(resp.Body) + if err != nil { + // 处理读取响应体失败的情况 + log.Fatalf("Read Response Error: %s", err) + } + // 打印响应状态码和响应体长度 + fmt.Printf("Get Response %d length: %d\n", resp.StatusCode, len(body)) + } + hc.isReady = true // 标记客户端为准备就绪状态 +} + +func (hc *HttpClient) IsReady() bool { + return hc.isReady +} + +func (hc *HttpClient) Exec() error { + hc.Get() + return nil +} + +func (hc *HttpClient) Get() { + // set headers by Complexity + req, _ := http.NewRequest(hc.Method, hc.Addr, hc.reqBody) + for i := 0; i < hc.Complexity; i++ { + newUuid := uuid.NewV1().String() + req.Header.Set(fmt.Sprintf("token%s", newUuid), newUuid) + } + start := time.Now() + resp, err := hc.Client.Do(req) + latency := time.Since(start) + if err != nil { + hc.ErrLatencyChan <- &latency + fmt.Println("query error:", err) + } else { + hc.LatencyChan <- &latency + } + // 用于复用连接 + io.ReadAll(resp.Body) + defer resp.Body.Close() + +} + +func (hc *HttpClient) Close() { + if hc.Client != nil { + // hc.Client.Close() + return + } +} diff --git a/evaluation/eval-bench/client/kafka/kafka-client.go b/evaluation/eval-bench/client/kafka/kafka-client.go new file mode 100644 index 0000000000000000000000000000000000000000..505d02931faea5327c19400c95e7d2ab519581bd --- /dev/null +++ b/evaluation/eval-bench/client/kafka/kafka-client.go @@ -0,0 +1,128 @@ +package main + +import ( + "context" + "fmt" + "log" + "os" + "time" + "github.com/IBM/sarama" +) + +type KafkaClient struct { + isReady bool + + LatencyChan chan *time.Duration + ErrLatencyChan chan *time.Duration + + Addrs []string + Topic string + Group string + producer *sarama.SyncProducer + consumer *sarama.Consumer + consumerGroup *sarama.ConsumerGroup + config *sarama.Config +} + +func (kc *KafkaClient) Property() { + fmt.Printf("KafkaClient Property: \n") + fmt.Printf("KafkaClient.Addrs: %v\n", kc.Addrs) + fmt.Printf("KafkaClient.Topic: %v\n", kc.Topic) + fmt.Printf("KafkaClient.Group: %v\n", kc.Group) +} + +func (kc *KafkaClient) InitClient() { + // Configuration + kc.config = sarama.NewConfig() + kc.config.Consumer.Return.Errors = true + kc.config.Producer.Return.Successes = true + + var err error + // Create a new producer + kc.producer, err = sarama.NewSyncProducer(kc.Addrs, kc.config) + if err != nil { + fmt.Printf("Error creating kafka producer: %v\n", err) + os.Exit(1) + } + + // Create a new consumer + kc.consumer, err = sarama.NewConsumer(kc.Addrs, nil) + if err != nil { + fmt.Printf("Error creating kafka consumer: %v\n", err) + os.Exit(1) + } + consumerHandler := ConsumerHandler{} + err = kc.consumer.SubscribeTopics([]string{kc.Topic}, &consumerHandler) + if err != nil { + fmt.Printf("Error subscribing to topics: %v\n", err) + os.Exit(1) + } + // Kafka consumer group + kc.consumerGroup, err := sarama.NewConsumerGroup(kc.Addrs, kc.Group, config) + if err != nil { + log.Fatal(err) + } +} + +func (kc *KafkaClient) IsReady() bool { + return kc.isReady +} + +func (kc *KafkaClient) Exec() error { + kc.Get() + return nil +} + +func (kc *KafkaClient) Get() { + go produceMessages(producer) + // Consume messages + go consumeMessages(consumerHandler) +} + +func (kc *KafkaClient) Close() { + if kc.producer != nil { + kc.producer.Close() + } + if kc.consumer != nil { + kc.consumer.Close() + } +} + +// ConsumerHandler is a simple implementation of sarama.ConsumerGroupHandler +type ConsumerHandler struct{} + +func (h *ConsumerHandler) Setup(sarama.ConsumerGroupSession) error { return nil } +func (h *ConsumerHandler) Cleanup(sarama.ConsumerGroupSession) error { return nil } +func (h *ConsumerHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { + for message := range claim.Messages() { + fmt.Printf("Received message: Topic=%s, Partition=%d, Offset=%d, Key=%s, Value=%s\n", + message.Topic, message.Partition, message.Offset, string(message.Key), string(message.Value)) + session.MarkMessage(message, "") + } + return nil +} + +func (kc *KafkaClient) produceMessages(producer sarama.AsyncProducer) { + // Produce a message + message := &sarama.ProducerMessage{ + Topic: kc.Topic, + Key: sarama.StringEncoder("key"), + Value: sarama.StringEncoder(fmt.Sprintf("Hello Kafka at %s", time.Now().Format(time.Stamp))), + } + producer.Input() <- message +} + +func (kc *KafkaClient) consumeMessages(consumerHandler ConsumerHandler) { + + // Handle errors + go func() { + for err := range consumerGroup.Errors() { + log.Printf("Error: %s\n", err) + } + }() + // Consume messages + err := kc.consumerGroup.Consume(context.Background(), []string{kc.Topic}, consumerHandler) + if err != nil { + log.Printf("Error: %s\n", err) + } +} diff --git a/evaluation/eval-bench/client/mongo/mongo-client.go b/evaluation/eval-bench/client/mongo/mongo-client.go new file mode 100644 index 0000000000000000000000000000000000000000..9f3558cdc59b033ead46f851f77e824cc1cf6206 --- /dev/null +++ b/evaluation/eval-bench/client/mongo/mongo-client.go @@ -0,0 +1,92 @@ +package mongo + +import ( + "fmt" + "log" + "time" + + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/common" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" +) + +type MongoClient struct { + isReady bool + collection *mgo.Collection + + LatencyChan chan *time.Duration + ErrLatencyChan chan *time.Duration + + Addr string + Password string + DB string + Client *mgo.Session + Complexity int +} + +func (mc *MongoClient) Property() { + fmt.Printf("MongoClient Property: \n") + fmt.Printf("Addr: %s\n", mc.Addr) + fmt.Printf("DB: %s\n", mc.DB) + fmt.Printf("Complexity: %d\n", mc.Complexity) +} + +func (mc *MongoClient) InitClient() { + var err error + if mc.DB == "" { + mc.DB = "app_traffic_test" + } + mc.Client, err = mgo.Dial(mc.Addr) + if err != nil { + log.Fatal(fmt.Sprintf("Dial Addr %s Err: %v", mc.Addr, err)) + } + mc.collection = mc.Client.DB(mc.DB).C("test") + _, err = mc.collection.RemoveAll(bson.M{}) + if err != nil { + log.Fatal(err) + } + // init data needed by get func + + builder := common.NewBuilder() + for i := 0; i < mc.Complexity; i++ { + builder = builder.AddString(fmt.Sprintf("Key%d", i)) + } + newStruct := builder.Build().New() + for i := 0; i < mc.Complexity; i++ { + newStruct.SetString(fmt.Sprintf("Key%d", i), fmt.Sprintf("value%d", i)) + } + + err = mc.collection.Insert(newStruct.Addr()) + if err != nil { + log.Fatal(err) + } + mc.isReady = true +} + +func (mc *MongoClient) IsReady() bool { + return mc.isReady +} + +func (mc *MongoClient) Exec() error { + mc.Get() + return nil +} + +func (mc *MongoClient) Get() { + start := time.Now() + var result []bson.M + err := mc.collection.Find(nil).All(&result) + latency := time.Since(start) + if err != nil { + mc.ErrLatencyChan <- &latency + fmt.Println("query error:", err) + } else { + mc.LatencyChan <- &latency + } +} + +func (mc *MongoClient) Close() { + if mc.Client != nil { + mc.Client.Close() + } +} diff --git a/evaluation/eval-bench/client/mysql/mysql-client.go b/evaluation/eval-bench/client/mysql/mysql-client.go new file mode 100644 index 0000000000000000000000000000000000000000..cecbbc6626cd50e547dcf591c85128e5f9706a00 --- /dev/null +++ b/evaluation/eval-bench/client/mysql/mysql-client.go @@ -0,0 +1,146 @@ +package mysql + +import ( + "database/sql" + "fmt" + "log" + "time" + + _ "github.com/go-sql-driver/mysql" +) + +type MysqlClient struct { + values []any + isReady bool + + LatencyChan chan *time.Duration + ErrLatencyChan chan *time.Duration + + Addr string + Password string + User string + DB string + Client *sql.DB + SessionCount int + Complexity int + Sql string +} + +func (mc *MysqlClient) Property() { + log.Println("MySqlClient Property:") + log.Printf("Addr: %s\n", mc.Addr) + log.Printf("User: %s\n", mc.User) + log.Printf("Password: %s\n", mc.Password) + log.Printf("DB: %s\n", mc.DB) + log.Printf("SessionCount: %d\n", mc.SessionCount) + log.Printf("Complexity: %d\n", mc.Complexity) + log.Printf("Sql: %s\n", mc.Sql) +} + +// InitClient 初始化MySQL客户端连接。 +// 此方法会首先尝试创建数据库(如果不存在),然后创建数据库连接,并设置最大连接数和最大空闲连接数。 +// 还会执行一个查询SQL来准备客户端操作。 +func (mc *MysqlClient) InitClient() { + // 如果数据库名称未设置,则默认为"app_traffic_test" + var err error + if mc.DB == "" { + mc.DB = "app_traffic_test" + } + // 构造数据库连接字符串 + dataSourceName := fmt.Sprintf("%s:%s@tcp(%s)/", mc.User, mc.Password, mc.Addr) + // 尝试打开数据库连接 + db, _ := sql.Open("mysql", dataSourceName) + // 创建数据库,如果不存在 + _, err = db.Exec(fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", mc.DB)) + if err != nil { + log.Fatal("create DB error:", err) + } + db.Close() // 关闭临时数据库连接 + + // 打开应用程序使用的数据库连接,并设置最大连接数和最大空闲连接数 + mc.Client, err = sql.Open("mysql", mc.User+":"+mc.Password+"@tcp("+mc.Addr+")/"+mc.DB) + if err != nil { + log.Fatal("create DB error:", err) + } + mc.Client.SetMaxOpenConns(mc.SessionCount) // 设置最大连接数 + mc.Client.SetMaxIdleConns(mc.SessionCount) // 设置保留连接数 + + // 准备查询SQL并执行 + mc.Sql = mc.getQuerySQL() + rows, err := mc.Client.Query(mc.Sql) + if err != nil { + panic(err) + } + defer rows.Close() // 确保查询结果被关闭 + cols, err := rows.Columns() // 获取查询结果的列名 + if err != nil { + panic(err) + } + // 初始化存储查询结果的切片 + mc.values = make([]any, len(cols)) + data := make([][]byte, len(cols)) + + // 为每一列创建一个字节切片,并将其地址赋值给values切片 + for i := range mc.values { + mc.values[i] = &data[i] + } + mc.isReady = true // 标记客户端为就绪状态 +} + +func (mc *MysqlClient) IsReady() bool { + return mc.isReady +} + +func (mc *MysqlClient) Exec() error { + err := mc.QueryTest() + return err +} + +func (mc *MysqlClient) Close() { + if mc.Client != nil { + mc.Client.Close() + } +} + +func (mc *MysqlClient) getQuerySQL() (sql string) { + if mc.Sql != "" { + return mc.Sql + } + sql = "SELECT 0" + for i := 1; i < mc.Complexity; i++ { + sql = fmt.Sprintf("%s, %d", sql, i) + } + return sql +} + +// QueryTest 是 MysqlClient 类的一个方法,用于并发执行查询操作,并统计每个查询的延迟时间。 +// 该方法不接受参数,返回可能发生的错误。 +func (mc *MysqlClient) QueryTest() error { + var err error + rows := make([]*sql.Row, mc.SessionCount) // 创建一个存储查询结果行的切片 + latencys := make([]time.Duration, mc.SessionCount) // 创建一个切片来存储每个查询的延迟时间 + + // 并发执行查询操作,并记录每次查询的延迟时间 + for i := 0; i < mc.SessionCount; i++ { + start := time.Now() + rows[i] = mc.Client.QueryRow(mc.Sql) // 执行查询 + latency := time.Since(start) // 计算查询延迟 + latencys[i] = latency + } + + // 对每个查询结果进行处理,统计总延迟时间,并处理可能发生的错误 + for i := 0; i < mc.SessionCount; i++ { + start := time.Now() + err := rows[i].Scan(mc.values...) // 提取查询结果 + latency := time.Since(start) // 计算处理延迟 + sumLatency := latencys[i] + latency // 计算总延迟时间 + if err != nil { + mc.ErrLatencyChan <- &sumLatency // 如果有错误,将总延迟时间发送到错误延迟通道 + fmt.Println("sql query error 1:", err) // 打印查询错误信息 + } else { + mc.LatencyChan <- &sumLatency // 无错误,将总延迟时间发送到正常延迟通道 + } + } + + return err +} diff --git a/evaluation/eval-bench/client/redis/redis-client.go b/evaluation/eval-bench/client/redis/redis-client.go new file mode 100644 index 0000000000000000000000000000000000000000..d04d406cad44633f445a5e41a05d6c912bd2da5f --- /dev/null +++ b/evaluation/eval-bench/client/redis/redis-client.go @@ -0,0 +1,102 @@ +package redis + +import ( + "fmt" + "strings" + "time" + + "github.com/go-redis/redis" +) + +type RedisClient struct { + isReady bool + + LatencyChan chan *time.Duration + ErrLatencyChan chan *time.Duration + + keys []string + hmap map[string]interface{} + + Addr string + Password string + DB int + Client *redis.Client + Complexity int + Method string +} + +func (rc *RedisClient) Property() { + fmt.Printf("RedisClient Property:\n") + fmt.Printf("Addr: %s\n", rc.Addr) + fmt.Printf("DB: %d\n", rc.DB) + fmt.Printf("Password: %s\n", rc.Password) + fmt.Printf("Complexity: %d\n", rc.Complexity) + fmt.Printf("Method: %s\n", rc.Method) +} + +func (rc *RedisClient) InitClient() { + client := redis.NewClient(&redis.Options{ + Addr: rc.Addr, + Password: rc.Password, + DB: rc.DB, + }) + rc.Client = client + // init data needed by get func + rc.keys = make([]string, rc.Complexity) + rc.hmap = make(map[string]interface{}) + for i := 0; i < rc.Complexity; i++ { + key := fmt.Sprintf("key%d", i) + value := fmt.Sprintf("value%d", i) + rc.keys[i] = key + rc.hmap[key] = value + } + rc.setMap() + rc.isReady = true +} + +func (rc *RedisClient) IsReady() bool { + return rc.isReady +} + +func (rc *RedisClient) setMap() error { + return rc.Client.HMSet("appHash", rc.hmap).Err() +} + +func (rc *RedisClient) Exec() error { + if strings.ToUpper(rc.Method) == "SET" { + rc.set() + } else { + rc.get() + } + return nil +} + +func (rc *RedisClient) set() { + start := time.Now() + err := rc.setMap() + latency := time.Since(start) + if err != nil { + rc.ErrLatencyChan <- &latency + fmt.Println("error:", err) + } else { + rc.LatencyChan <- &latency + } +} + +func (rc *RedisClient) get() { + start := time.Now() + _, err := rc.Client.HMGet("appHash", rc.keys...).Result() + latency := time.Since(start) + if err != nil { + rc.ErrLatencyChan <- &latency + fmt.Println("error:", err) + } else { + rc.LatencyChan <- &latency + } +} + +func (rc *RedisClient) Close() { + if rc.Client != nil { + rc.Client.Close() + } +} diff --git a/evaluation/eval-bench/cmd/eb/main.go b/evaluation/eval-bench/cmd/eb/main.go new file mode 100644 index 0000000000000000000000000000000000000000..75ff2a45a6cbea559b9bc71fef7c1aa8b42c07bd --- /dev/null +++ b/evaluation/eval-bench/cmd/eb/main.go @@ -0,0 +1,263 @@ +package main + +import ( + "flag" + "fmt" + "log" + "strings" + "time" + + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client" + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/grpc" + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/http" + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/mongo" + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/mysql" + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/redis" + "gitlab.yunshan.net/yunshan/evaluation/eval-bench/common" + "go.uber.org/ratelimit" +) + +var SUPPORT_ENGINES = []string{"redis", "mysql", "mongo", "grpc", "h2c", "https", "http"} + +var ( + fhost = flag.String("h", "", "Target host:port") + fpasswd = flag.String("p", "", "DB password") + frate = flag.Int("r", 0, "Packets per second") + fthreads = flag.Int("t", 1, "Number of threads") + fengine = flag.String("e", "", fmt.Sprintf("Engine of protocol %v", SUPPORT_ENGINES)) + fduration = flag.Int("d", 0, "execution time in seconds") + fconcurrent = flag.Int("c", 1, "concurrent connections of each thread") + + fcomplexity = flag.Int("complexity", 1, "complexity of query sql") + fmethod = flag.String("method", "", "method of query, redis:[GET, SET], http2:[GET, POST]") + fsql = flag.String("sql", "", "customizable sql of query, only support mysql") + fdb = flag.String("db", "", "database name, support [redis, mysql, mongo]") + fdataSize = flag.Int("datasize", 1, "body size of http/http2 query") + fkeepalive = flag.Bool("keepalive", true, "keepalive of each http client") +) + +const engineTemplateCmd = ` +Template of Each Engine: + 1. grpc: ./eb -h {host}:{port} -d {duration} -e grpc -r {rate} -t {threads} + 2. redis: ./eb -h {host}:{port} -d {duration} -e redis -r {rate} -t {threads} -p {password} -m {method[GET, SET]} -complexity {complexity(count of keys)} + 3. mysql: ./eb -h {host}:{port} -d {duration} -e mysql -r {rate} -t {threads} -p {password} -c {concurrent connections} -sql {sql} -datasize {body size} -complexity {complexity(len of sql)} + 4. mongo: ./eb -h {host}:{port} -d {duration} -e mongo -r {rate} -t {threads} -p {password} -complexity {complexity(count of keys)} + 5. http/https/h2c: ./eb -h {host}:{port} -d {duration} -e {http/https/h2c} -r {rate} -t {threads} -m {method[GET, POST]} -keepalive {keepalive[true,false]} -complexity {complexity(headers count)} -datasize {body size} ` + +func main() { + // output of --help + flag.Usage = func() { + flag.PrintDefaults() + fmt.Println(engineTemplateCmd) + } + flag.Usage() + flag.Parse() + // check flag + if *fhost == "" { + log.Fatal("fhost -h should be assigned") + } + if *frate == 0 { + log.Fatal("frate -r should be assigned") + } + if *fengine == "" || !strings.Contains(strings.Join(SUPPORT_ENGINES, " "), *fengine) { + log.Fatalf(fmt.Sprintf("fengine -e should be assigned %v", SUPPORT_ENGINES)) + } + if *fduration == 0 { + log.Fatal("fduration -d should be assigned") + } + if *fconcurrent > 1 && (*fthreads)*(*fconcurrent)*10 > *frate { + log.Fatal("(fthreads * fconcurrent * 10) should be less than (frate)") + } + if *fcomplexity < 1 { + log.Fatal("fcomplexity should > 0") + } + + engines := make([]client.EngineClient, *fthreads) + var rateTokenCount int // exec count of each token + + rps_rate := (*frate + *fthreads - 1) / *fthreads + rate := rps_rate / *fconcurrent + + startChan := make(chan int, *fthreads) // use to start all thread + stopChan := make(chan int, *fthreads) + endChan := make(chan int, 1) + var startTime time.Time + + latencyChan := make(chan *time.Duration, 10000) + errLatencyChan := make(chan *time.Duration, 10000) + + latencyResult := &common.LatencyResult{} + latencyResult.Init() + // token count per second + // token count = rps_rate / concurrent count of each client / exec count of each token(10 or 5 or 1) + if rate%10 == 0 { + rateTokenCount = rate / 10 + } else if rate%5 == 0 { + rateTokenCount = rate / 5 + } else { + rateTokenCount = rate + } + + for i := 0; i < *fthreads; i++ { + var engineClinet client.EngineClient + if *fengine == "redis" { + engineClinet = &redis.RedisClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + Password: *fpasswd, + DB: 0, + Complexity: *fcomplexity, + Method: *fmethod, + } + } else if *fengine == "mysql" { + engineClinet = &mysql.MysqlClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + Password: *fpasswd, + DB: *fdb, + User: "root", + SessionCount: *fconcurrent, + Complexity: *fcomplexity, + Sql: *fsql, + } + } else if *fengine == "grpc" { + engineClinet = &grpc.GrpcClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + } + } else if *fengine == "mongo" { + engineClinet = &mongo.MongoClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + Password: *fpasswd, + DB: *fdb, + Complexity: *fcomplexity, + } + } else if *fengine == "h2c" { + engineClinet = &http.HttpClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + Method: *fmethod, + Complexity: *fcomplexity, + DataSize: *fdataSize, + KeepAlive: *fkeepalive, + H2C: true, + TLS: false, + } + } else if *fengine == "https" { + engineClinet = &http.HttpClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + Method: *fmethod, + Complexity: *fcomplexity, + DataSize: *fdataSize, + KeepAlive: *fkeepalive, + H2C: false, + TLS: true, + } + } else if *fengine == "http" { + engineClinet = &http.HttpClient{ + LatencyChan: latencyChan, + ErrLatencyChan: errLatencyChan, + Addr: *fhost, + Method: *fmethod, + Complexity: *fcomplexity, + DataSize: *fdataSize, + KeepAlive: *fkeepalive, + H2C: false, + TLS: false, + } + } + + engines[i] = engineClinet + if i == 0 { + engineClinet.Property() + } + + // Take 10 tokens each time to avoid too high call frequency of the Take() function + // WithoutSlack cancel maxSlack + + go func(index int) { + engineClinet.InitClient() + defer engineClinet.Close() + log.Printf("[*] Start %s App Traffic %s, date rate %d rps.\n", *fengine, *fhost, rps_rate) + rate_limit := ratelimit.New(rateTokenCount, ratelimit.WithoutSlack) + // exec count of each token + execCount := rate / rateTokenCount + // wait all thread ready + <-startChan + for { + select { + case <-stopChan: + return + default: + rate_limit.Take() + for j := 0; j < execCount; j++ { + engineClinet.Exec() + } + } + } + }(i) + } + + // wait all client ready + for { + ready := true + for i := 0; i < *fthreads; i++ { + if !engines[i].IsReady() { + ready = false + } + } + if ready { + break + } + time.Sleep(time.Duration(100) * time.Millisecond) + } + + // accept latencyfrom exec thread + go func() { + var lt *time.Duration + var elt *time.Duration + for { + select { + case <-endChan: + return + case lt = <-latencyChan: // latencyfrom success exec + latencyResult.Append(lt, false) + case elt = <-errLatencyChan: // latencyfrom error exec + latencyResult.Append(elt, true) + default: + time.Sleep(time.Duration(10) * time.Millisecond) + continue + } + } + }() + times := 0 + // start all clinet + startTime = time.Now() + for i := 0; i < *fthreads; i++ { + startChan <- 1 + } + for { + time.Sleep(time.Duration(1) * time.Second) + fmt.Printf("now request count about %d , errCount about %d, cost time %.6fs\n", latencyResult.Count, latencyResult.ErrCount, time.Since(startTime).Seconds()) + times += 1 + if *fthreads > 0 && times >= *fduration { + for i := 0; i < *fthreads; i++ { + stopChan <- 1 + } + endChan <- 1 + latencyResult.ExecSeconds = time.Since(startTime).Seconds() + break + } + } + // Print result + latencyResult.Print() + +} diff --git a/evaluation/eval-bench/common/results.go b/evaluation/eval-bench/common/results.go new file mode 100644 index 0000000000000000000000000000000000000000..f046f54c1739d124fc6aaf5960a253323f13dba7 --- /dev/null +++ b/evaluation/eval-bench/common/results.go @@ -0,0 +1,55 @@ +package common + +import ( + "fmt" + "sync" + "time" + + "github.com/rcrowley/go-metrics" +) + +type LatencyResult struct { + Count int + ErrCount int + ExecSeconds float64 + + h metrics.Histogram + mutex sync.Mutex +} + +func (lr *LatencyResult) Init() { + lr.h = metrics.NewHistogram(metrics.NewUniformSample(10000000)) + metrics.Register("latency.histogram", lr.h) +} + +func (lr *LatencyResult) Append(latency *time.Duration, isErr bool) error { + lr.mutex.Lock() + defer lr.mutex.Unlock() + lr.h.Update(int64(*latency)) + if isErr { + lr.ErrCount++ + } else { + lr.Count++ + } + return nil +} + +func (lr *LatencyResult) Print() error { + lr.mutex.Lock() + defer lr.mutex.Unlock() + + //calculate all metrics + avg := time.Duration(lr.h.Mean()) + max := time.Duration(lr.h.Max()) + p50 := time.Duration(lr.h.Percentile(0.5)) + p90 := time.Duration(lr.h.Percentile(0.9)) + total := lr.Count + lr.ErrCount + if total < 1 || lr.ExecSeconds == 0 { + fmt.Printf("error: request Count or ExecSeconds = 0") + return nil + } + fmt.Printf("exec duration: %fs\n", lr.ExecSeconds) + fmt.Printf("total: %d, count: %d, error: %d, request/sec: %.2f ", total, lr.Count, lr.ErrCount, float64(total)/lr.ExecSeconds) + fmt.Printf("avg: %v max: %v p50: %v p90: %v \n", avg, max, p50, p90) + return nil +} diff --git a/evaluation/eval-bench/common/struct-builder.go b/evaluation/eval-bench/common/struct-builder.go new file mode 100644 index 0000000000000000000000000000000000000000..c6f515dad04d48090256b4cd3cc0c237586a421d --- /dev/null +++ b/evaluation/eval-bench/common/struct-builder.go @@ -0,0 +1,104 @@ +package common + +import ( + "errors" + "reflect" +) + +// 构造器 +type Builder struct { + // 用于存储属性字段 + fileId []reflect.StructField +} + +func NewBuilder() *Builder { + return &Builder{} +} + +// 添加字段 +func (b *Builder) AddField(field string, typ reflect.Type) *Builder { + b.fileId = append(b.fileId, reflect.StructField{Name: field, Type: typ}) + return b +} + +// 根据预先添加的字段构建出结构体 +func (b *Builder) Build() *Struct { + stu := reflect.StructOf(b.fileId) + index := make(map[string]int) + for i := 0; i < stu.NumField(); i++ { + index[stu.Field(i).Name] = i + } + return &Struct{stu, index} +} +func (b *Builder) AddString(name string) *Builder { + return b.AddField(name, reflect.TypeOf("")) +} + +func (b *Builder) AddBool(name string) *Builder { + return b.AddField(name, reflect.TypeOf(true)) +} + +func (b *Builder) AddInt64(name string) *Builder { + return b.AddField(name, reflect.TypeOf(int64(0))) +} + +func (b *Builder) AddFloat64(name string) *Builder { + return b.AddField(name, reflect.TypeOf(float64(1.2))) +} + +// 实际生成的结构体,基类 +// 结构体的类型 +type Struct struct { + typ reflect.Type + // // 用于通过字段名称,从Builder的[]reflect.StructField中获取reflect.StructField + index map[string]int +} + +func (s Struct) New() *Instance { + return &Instance{reflect.New(s.typ).Elem(), s.index} +} + +// 结构体的值 +type Instance struct { + instance reflect.Value + // + index map[string]int +} + +func (in Instance) Field(name string) (reflect.Value, error) { + if i, ok := in.index[name]; ok { + return in.instance.Field(i), nil + } else { + return reflect.Value{}, errors.New("field no exist") + } +} +func (in *Instance) SetString(name, value string) { + if i, ok := in.index[name]; ok { + in.instance.Field(i).SetString(value) + } +} + +func (in *Instance) SetBool(name string, value bool) { + if i, ok := in.index[name]; ok { + in.instance.Field(i).SetBool(value) + } +} + +func (in *Instance) SetInt64(name string, value int64) { + if i, ok := in.index[name]; ok { + in.instance.Field(i).SetInt(value) + } +} + +func (in *Instance) SetFloat64(name string, value float64) { + if i, ok := in.index[name]; ok { + in.instance.Field(i).SetFloat(value) + } +} +func (i *Instance) Interface() interface{} { + return i.instance.Interface() +} + +func (i *Instance) Addr() interface{} { + return i.instance.Addr().Interface() +} diff --git a/evaluation/eval-bench/go.mod b/evaluation/eval-bench/go.mod new file mode 100644 index 0000000000000000000000000000000000000000..7c0e746730342a88c3fb518af3b41f5ebace14d0 --- /dev/null +++ b/evaluation/eval-bench/go.mod @@ -0,0 +1,45 @@ +module gitlab.yunshan.net/yunshan/evaluation/eval-bench + +go 1.20 + +require ( + github.com/IBM/sarama v1.43.2 + github.com/go-redis/redis v6.15.9+incompatible + github.com/go-sql-driver/mysql v1.7.1 + github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 + github.com/satori/go.uuid v1.2.0 + go.uber.org/ratelimit v0.3.0 + golang.org/x/net v0.24.0 + google.golang.org/grpc v1.62.0 + google.golang.org/protobuf v1.32.0 + gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 +) + +require ( + github.com/benbjohnson/clock v1.3.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/eapache/go-resiliency v1.6.0 // indirect + github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect + github.com/eapache/queue v1.1.0 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/go-uuid v1.0.3 // indirect + github.com/jcmturner/aescts/v2 v2.0.0 // indirect + github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect + github.com/jcmturner/gofork v1.7.6 // indirect + github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect + github.com/jcmturner/rpc/v2 v2.0.3 // indirect + github.com/klauspost/compress v1.17.8 // indirect + github.com/kr/text v0.1.0 // indirect + github.com/onsi/ginkgo v1.16.5 // indirect + github.com/onsi/gomega v1.31.1 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect + golang.org/x/crypto v0.22.0 // indirect + golang.org/x/sys v0.19.0 // indirect + golang.org/x/text v0.14.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect + gopkg.in/yaml.v2 v2.3.0 // indirect +) diff --git a/evaluation/eval-bench/go.sum b/evaluation/eval-bench/go.sum new file mode 100644 index 0000000000000000000000000000000000000000..9ca84c16753ba8b7696dc432ec7e1df566f2a247 --- /dev/null +++ b/evaluation/eval-bench/go.sum @@ -0,0 +1,189 @@ +github.com/IBM/sarama v1.43.2 h1:HABeEqRUh32z8yzY2hGB/j8mHSzC/HA9zlEjqFNCzSw= +github.com/IBM/sarama v1.43.2/go.mod h1:Kyo4WkF24Z+1nz7xeVUFWIuKVV8RS3wM8mkvPKMdXFQ= +github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= +github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/eapache/go-resiliency v1.6.0 h1:CqGDTLtpwuWKn6Nj3uNUdflaq+/kIPsg0gfNzHton30= +github.com/eapache/go-resiliency v1.6.0/go.mod h1:5yPzW0MIvSe0JDsv0v+DvcjEv2FyD6iZYSs1ZI+iQho= +github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 h1:Oy0F4ALJ04o5Qqpdz8XLIpNA3WM/iSIXqxtqo7UGVws= +github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3/go.mod h1:YvSRo5mw33fLEx1+DlK6L2VV43tJt5Eyel9n9XBcR+0= +github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= +github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= +github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg= +github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= +github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= +github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= +github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= +github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM= +github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= +github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/ratelimit v0.3.0 h1:IdZd9wqvFXnvLvSEBo0KPcGfkoBGNkpTHlrE3Rcjkjw= +go.uber.org/ratelimit v0.3.0/go.mod h1:So5LG7CV1zWpY1sHe+DXTJqQvOx+FFPFaAs2SnoyBaI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s= +google.golang.org/grpc v1.62.0 h1:HQKZ/fa1bXkX1oFOvSjmZEUL8wLSaZTjCcLAlmZRtdk= +google.golang.org/grpc v1.62.0/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw= +gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/evaluation/eval-bench/pkg/centos/eval-bench.spec b/evaluation/eval-bench/pkg/centos/eval-bench.spec new file mode 100644 index 0000000000000000000000000000000000000000..dbde362aa9011d4a3269075700d01686ffd684ca --- /dev/null +++ b/evaluation/eval-bench/pkg/centos/eval-bench.spec @@ -0,0 +1,29 @@ +Name: eval-bench +Version: 1.0 +Release: %(git rev-list --count HEAD)%{?dist} +Summary: deepflow eval-bench + +Group: Applications/File +Vendor: Yunshan Networks +License: Copyright (c) 2012-2016 Yunshan Netwoks +URL: http://yunshan.net +Source: eval-bench.spec + +Autoreq: 0 + +%define pwd %(echo $PWD) + +%description +Deepflow Eval Bench + +%prep +mkdir -p $RPM_BUILD_ROOT/usr/sbin/ +cp %pwd/bin/x86_64/eb $RPM_BUILD_ROOT/usr/sbin/ + +%files +/usr/sbin/eb + +%changelog + + + diff --git a/evaluation/eval-bench/test_server/grpcserver/grpc-server.go b/evaluation/eval-bench/test_server/grpcserver/grpc-server.go new file mode 100644 index 0000000000000000000000000000000000000000..dd55c6eb390ea4a3074bc5fddb2ec1c4dcea1f6b --- /dev/null +++ b/evaluation/eval-bench/test_server/grpcserver/grpc-server.go @@ -0,0 +1,70 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package main implements a server for Greeter service. +package main + +import ( + "context" + "flag" + "fmt" + "log" + "net" + + pb "gitlab.yunshan.net/yunshan/evaluation/eval-bench/client/grpc/pb" + "google.golang.org/grpc" +) + +var ( + port = flag.Int("port", 50051, "The server port") +) + +// server is used to implement helloworld.GreeterServer. +type server struct { + pb.UnimplementedGreeterServer +} + +// SayHello implements helloworld.GreeterServer +func (s *server) SayHello(ctx context.Context, in *pb.HelloRequest) (*pb.HelloReply, error) { + return &pb.HelloReply{ + Message1: "Hello 1" + in.GetName1(), + Message2: "Hello 2" + in.GetName2(), + Message3: "Hello 3" + in.GetName3(), + Message4: "Hello 4" + in.GetName4(), + Message5: "Hello 5" + in.GetName5(), + Message6: "Hello 6" + in.GetName6(), + Message7: "Hello 7" + in.GetName7(), + Message8: "Hello 8" + in.GetName8(), + Message9: "Hello 9" + in.GetName9(), + Message10: "Hello 10" + in.GetName10(), + }, nil +} + +func main() { + flag.Parse() + lis, err := net.Listen("tcp", fmt.Sprintf(":%d", *port)) + if err != nil { + log.Fatalf("failed to listen: %v", err) + } + s := grpc.NewServer() + pb.RegisterGreeterServer(s, &server{}) + log.Printf("server listening at %v", lis.Addr()) + if err := s.Serve(lis); err != nil { + log.Fatalf("failed to serve: %v", err) + } +} diff --git a/evaluation/eval-charts/evaluation-controller/.helmignore b/evaluation/eval-charts/evaluation-controller/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..0e8a0eb36f4ca2c939201c0d54b5d82a1ea34778 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/evaluation/eval-charts/evaluation-controller/Chart.yaml b/evaluation/eval-charts/evaluation-controller/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbdcfdf5882c94453ecfc846335f40b13df1c117 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/Chart.yaml @@ -0,0 +1,34 @@ +apiVersion: v2 +name: evaluation +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" +dependencies: +- name: mysql + repository: "" + version: '*.*.*' +- name: redis + repository: "" + version: '*.*.*' +- name: web + repository: "" + version: '*.*.*' \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/.helmignore b/evaluation/eval-charts/evaluation-controller/charts/mysql/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..0e8a0eb36f4ca2c939201c0d54b5d82a1ea34778 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/Chart.yaml b/evaluation/eval-charts/evaluation-controller/charts/mysql/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b678b5289677bb9227857cd75a02255ffb458193 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: mysql +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.000 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "8.0.26" diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/_affinity.tpl b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/_affinity.tpl new file mode 100644 index 0000000000000000000000000000000000000000..9c3d0dfbc77d1c2f0e045f492c318a92befd5a1e --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/_affinity.tpl @@ -0,0 +1,255 @@ +{{/* affinity - https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ */}} + +{{- define "nodeaffinity" }} + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + {{- include "nodeAffinityRequiredDuringScheduling" . }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- include "nodeAffinityPreferredDuringScheduling" . }} +{{- end }} + +{{- define "nodeAffinityRequiredDuringScheduling" }} + {{- if or .Values.nodeAffinityLabelSelector .Values.global.nodeAffinityLabelSelector }} + nodeSelectorTerms: + {{- range $matchExpressionsIndex, $matchExpressionsItem := .Values.nodeAffinityLabelSelector }} + - matchExpressions: + {{- range $Index, $item := $matchExpressionsItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- range $matchExpressionsIndex, $matchExpressionsItem := .Values.global.nodeAffinityLabelSelector }} + - matchExpressions: + {{- range $Index, $item := $matchExpressionsItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} + +{{- define "nodeAffinityPreferredDuringScheduling" }} + {{- range $weightIndex, $weightItem := .Values.nodeAffinityTermLabelSelector }} + - weight: {{ $weightItem.weight }} + preference: + matchExpressions: + {{- range $Index, $item := $weightItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- range $weightIndex, $weightItem := .Values.global.nodeAffinityTermLabelSelector }} + - weight: {{ $weightItem.weight }} + preference: + matchExpressions: + {{- range $Index, $item := $weightItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} + + +{{- define "podAffinity" }} +{{- if or .Values.podAffinityLabelSelector .Values.podAffinityTermLabelSelector}} + podAffinity: + {{- if .Values.podAffinityLabelSelector }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- include "podAffinityRequiredDuringScheduling" . }} + {{- end }} + {{- if or .Values.podAffinityTermLabelSelector}} + preferredDuringSchedulingIgnoredDuringExecution: + {{- include "podAffinityPreferredDuringScheduling" . }} + {{- end }} +{{- end }} +{{- end }} + +{{- define "podAffinityRequiredDuringScheduling" }} + {{- range $labelSelector, $labelSelectorItem := .Values.podAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} + {{- range $labelSelector, $labelSelectorItem := .Values.global.podAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} +{{- end }} + +{{- define "podAffinityPreferredDuringScheduling" }} + {{- range $labelSelector, $labelSelectorItem := .Values.podAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} + {{- range $labelSelector, $labelSelectorItem := .Values.global.podAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} +{{- end }} + +{{- define "podAntiAffinity" }} +{{- if or .Values.podAntiAffinityLabelSelector .Values.podAntiAffinityTermLabelSelector}} + podAntiAffinity: + {{- if .Values.podAntiAffinityLabelSelector }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- include "podAntiAffinityRequiredDuringScheduling" . }} + {{- end }} + {{- if or .Values.podAntiAffinityTermLabelSelector}} + preferredDuringSchedulingIgnoredDuringExecution: + {{- include "podAntiAffinityPreferredDuringScheduling" . }} + {{- end }} +{{- end }} +{{- end }} + +{{- define "podAntiAffinityRequiredDuringScheduling" }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.podAntiAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.global.podAntiAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} +{{- end }} + +{{- define "podAntiAffinityPreferredDuringScheduling" }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.podAntiAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.global.podAntiAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/_helpers.tpl b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..4ff7faa999175638fe7925e45fdce72e5933f5c5 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/_helpers.tpl @@ -0,0 +1,54 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "mysql.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "mysql.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "mysql.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "mysql.labels" -}} +helm.sh/chart: {{ include "mysql.chart" . }} +{{ include "mysql.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "mysql.selectorLabels" -}} +app: evaluation +component: mysql +app.kubernetes.io/name: {{ include "mysql.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/configmap.yaml b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9cf1d1f074ed1f0d8c63a43cb8203834a669093 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/configmap.yaml @@ -0,0 +1,32 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: {{ include "mysql.fullname" . }} + labels: + {{- include "mysql.labels" . | nindent 4 }} +data: + my.cnf: |- + [client] + default-character-set=utf8 + + [mysqld] + default-authentication-plugin=mysql_native_password + + # Network related + bind-address=* + port=30130 + + # Enable query cache + innodb_buffer_pool_size=20M + max_connections=1000 + wait_timeout=28800 + + # Replication related + slave_skip_errors=all + + ## Binlog expiration time, default 1 days + binlog_expire_logs_seconds = 86400 + init.sql: |- + ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY '{{ tpl $.Values.password . }}'; + CREATE USER IF NOT EXISTS 'root'@'%' IDENTIFIED WITH mysql_native_password BY '{{ tpl $.Values.password . }}'; + GRANT ALL ON *.* TO 'root'@'%' WITH GRANT OPTION; diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/deployment.yaml b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4d81856a6c0ccc19cc5d5fddd0464bbab2088a6 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/deployment.yaml @@ -0,0 +1,129 @@ +{{- if not .Values.externalMySQL.enabled }} +{{- if and (not $.Values.global.allInOneLocalStorage ) (eq ( tpl $.Values.storageConfig.generateType . ) "hostPath") (not $.Values.nodeAffinityLabelSelector) }} +{{- fail "You must set nodeAffinityLabelSelector" -}} +{{- end}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mysql.fullname" . }} + labels: + {{- include "mysql.labels" . | nindent 4 }} +spec: + replicas: {{ tpl (toString .Values.replicas) . }} + strategy: + type: Recreate + selector: + matchLabels: + {{- include "mysql.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "mysql.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + hostNetwork: {{ tpl (toString .Values.hostNetwork) . }} + dnsPolicy: {{ tpl .Values.dnsPolicy . }} + imagePullSecrets: + {{- with .Values.global.imagePullSecrets }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.imagePullSecrets }} + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if and (eq ( tpl $.Values.storageConfig.generateType . ) "hostPath") ($.Values.storageConfig.hostPathChownContainerEnabled) }} + {{- if .Values.chmodContainer.enabled }} + initContainers: + - name: hostpath-chown + image: "{{ tpl .Values.image.repository . }}:{{ tpl (toString .Values.image.tag) . }}" + securityContext: + runAsNonRoot: false + runAsUser: 0 + command: + - chown + - 27:27 + - /var/lib/mysql + volumeMounts: + - mountPath: /var/lib/mysql + name: data-path + {{- end }} + {{- end }} + containers: + - name: mysql + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ tpl .Values.image.repository . }}:{{ tpl (toString .Values.image.tag) . }}" + imagePullPolicy: {{ tpl .Values.image.pullPolicy . }} + ports: + - name: tcp + containerPort: 30130 + protocol: TCP + livenessProbe: + {{ toYaml .Values.livenessProbe | nindent 12 }} + readinessProbe: + {{ toYaml .Values.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + env: + - name: MYSQL_ROOT_PASSWORD + value: {{ tpl .Values.password . }} + - name: MYSQL_DATABASE + value: {{ tpl .Values.db . }} + volumeMounts: + - name: mysql-config + mountPath: /etc/my.cnf + subPath: my.cnf + - name: mysql-config-init-sql + mountPath: /docker-entrypoint-initdb.d/init.sql + subPath: init.sql + - mountPath: /var/lib/mysql + name: data-path + volumes: + - name: mysql-config + configMap: + name: {{ include "mysql.fullname" . }} + items: + - key: my.cnf + path: my.cnf + - name: mysql-config-init-sql + configMap: + name: {{ include "mysql.fullname" . }} + items: + - key: init.sql + path: init.sql + - name: data-path + {{- if eq ( tpl .Values.storageConfig.generateType . ) "persistentVolumeClaim" }} + persistentVolumeClaim: + claimName: {{ .Values.storageConfig.persistence.existingClaim | default (printf "%s-data-pvc" (include "mysql.fullname" . )) }} + {{- end }} + {{- if eq ( tpl .Values.storageConfig.generateType . ) "hostPath" }} + hostPath: + type: DirectoryOrCreate + path: {{ tpl .Values.storageConfig.hostPath . }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + affinity: + {{- include "nodeaffinity" . | indent 6 }} + {{- include "podAffinity" . | indent 6 }} + {{- include "podAntiAffinity" . | indent 6 }} + {{- if or .Values.global.tolerations .Values.tolerations }} + tolerations: + {{- if .Values.global.tolerations }} + {{- toYaml .Values.global.tolerations | nindent 8 }} + {{- end }} + {{- if .Values.tolerations }} + {{- toYaml .Values.tolerations | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/pvc.yaml b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/pvc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..425d27e0140965cd38c3a498a5bf0fe5a615d132 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/pvc.yaml @@ -0,0 +1,19 @@ +{{- if and (eq ( tpl $.Values.storageConfig.generateType . ) "persistentVolumeClaim") (not .Values.storageConfig.persistence.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "mysql.fullname" . }}-data-pvc + labels: + {{- include "mysql.labels" . | nindent 4 }} + annotations: + {{- toYaml .Values.storageConfig.persistence.annotations | nindent 4 }} +spec: + {{- if (tpl .Values.storageConfig.persistence.storageClass .) }} + storageClassName: {{ tpl .Values.storageConfig.persistence.storageClass . | quote }} + {{- end }} + accessModes: + - {{ .Values.storageConfig.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.storageConfig.persistence.size | quote }} +{{- end -}} \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/service.yaml b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5055787d6bee56239292c4d8a8a7a2a6aaef2a91 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/templates/service.yaml @@ -0,0 +1,49 @@ + + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mysql.fullname" . }} + labels: + {{- include "mysql.labels" . | nindent 4 }} +{{- if .Values.service.annotations }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.service.clusterIP }} + clusterIP: {{ .Values.service.clusterIP }} +{{- end }} +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.service.loadBalancerIP }} +{{- end }} +{{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy }} +{{- end }} + ports: + {{- $serviceType := .Values.service.type -}} + {{- range .Values.service.ports }} + - name: {{ tpl .name $ }} + port: {{ tpl (toString .port) $ }} + targetPort: {{ tpl (toString .targetPort) $ }} + {{- if and (eq $serviceType "NodePort") (.nodePort) }} + nodePort: {{ tpl (toString .nodePort) $ }} + {{- end }} + protocol: {{ tpl .protocol $ }} + {{- end }} +{{- if .Values.service.additionalPorts }} +{{ toYaml .Values.service.additionalPorts | indent 2 }} +{{- end }} + selector: + {{- include "mysql.selectorLabels" . | nindent 4 }} + type: "{{ .Values.service.type }}" \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/mysql/values.yaml b/evaluation/eval-charts/evaluation-controller/charts/mysql/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b304d99a9ec07a4989ec165a2c319faf2e9ca97 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/mysql/values.yaml @@ -0,0 +1,123 @@ +# Default values for mysql. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +global: + podAntiAffinityLabelSelector: [] + podAntiAffinityTermLabelSelector: [] + podAffinityLabelSelector: [] + podAffinityTermLabelSelector: [] + nodeAffinityLabelSelector: [] + nodeAffinityTermLabelSelector: [] + +replicas: 1 + +image: + repository: mysql + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: 8.0.31 + +timezone: "Asia/Shanghai" +hostNetwork: "false" +dnsPolicy: ClusterFirst +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +password: "{{ $.Values.global.mysql.password }}" +db: "{{ $.Values.global.mysql.db }}" +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 +readinessProbe: + tcpSocket: + port: 30130 + failureThreshold: 6 + initialDelaySeconds: 15 + periodSeconds: 10 + successThreshold: 1 +livenessProbe: + failureThreshold: 6 + initialDelaySeconds: 15 + periodSeconds: 20 + successThreshold: 1 + tcpSocket: + port: 30130 + timeoutSeconds: 1 + +chmodContainer: + enabled: true +securityContext: {} +storageConfig: + generateType: "{{ if $.Values.global.allInOneLocalStorage }}hostPath{{ else }}{{$.Values.storageConfig.type}}{{end}}" #Please ignore this + ## persistentVolumeClaim/hostPath + type: persistentVolumeClaim + hostPath: /opt/evaluation-mysql + persistence: + storageClass: "" + annotations: + "helm.sh/resource-policy": keep + # existingClaim: your-claim-pvc-name + accessMode: ReadWriteOnce + size: 8Gi + +externalMySQL: + enabled: false + hostIP: 192.168.1.1 + port: 30130 + +service: + ## Configuration for Clickhouse service + ## + annotations: {} + labels: {} + clusterIP: "" + + ## Port for Clickhouse Service to listen on + ## + + ports: + - name: tcp + port: 30130 + targetPort: 30130 + nodePort: + protocol: TCP + ## Additional ports to open for server service + additionalPorts: [] + + externalIPs: [] + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## must be Local + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +podAntiAffinityLabelSelector: [] +podAntiAffinityTermLabelSelector: [] +podAffinityLabelSelector: [] +podAffinityTermLabelSelector: [] +nodeAffinityLabelSelector: [] +nodeAffinityTermLabelSelector: [] + diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/.helmignore b/evaluation/eval-charts/evaluation-controller/charts/redis/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..0e8a0eb36f4ca2c939201c0d54b5d82a1ea34778 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/Chart.yaml b/evaluation/eval-charts/evaluation-controller/charts/redis/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90588c7d1b6cca5be61da96b4fa191e818f1bbec --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: redis +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "7.2.4" diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/templates/_affinity.tpl b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/_affinity.tpl new file mode 100644 index 0000000000000000000000000000000000000000..9c3d0dfbc77d1c2f0e045f492c318a92befd5a1e --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/_affinity.tpl @@ -0,0 +1,255 @@ +{{/* affinity - https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ */}} + +{{- define "nodeaffinity" }} + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + {{- include "nodeAffinityRequiredDuringScheduling" . }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- include "nodeAffinityPreferredDuringScheduling" . }} +{{- end }} + +{{- define "nodeAffinityRequiredDuringScheduling" }} + {{- if or .Values.nodeAffinityLabelSelector .Values.global.nodeAffinityLabelSelector }} + nodeSelectorTerms: + {{- range $matchExpressionsIndex, $matchExpressionsItem := .Values.nodeAffinityLabelSelector }} + - matchExpressions: + {{- range $Index, $item := $matchExpressionsItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- range $matchExpressionsIndex, $matchExpressionsItem := .Values.global.nodeAffinityLabelSelector }} + - matchExpressions: + {{- range $Index, $item := $matchExpressionsItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} + +{{- define "nodeAffinityPreferredDuringScheduling" }} + {{- range $weightIndex, $weightItem := .Values.nodeAffinityTermLabelSelector }} + - weight: {{ $weightItem.weight }} + preference: + matchExpressions: + {{- range $Index, $item := $weightItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- range $weightIndex, $weightItem := .Values.global.nodeAffinityTermLabelSelector }} + - weight: {{ $weightItem.weight }} + preference: + matchExpressions: + {{- range $Index, $item := $weightItem.matchExpressions }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} + + +{{- define "podAffinity" }} +{{- if or .Values.podAffinityLabelSelector .Values.podAffinityTermLabelSelector}} + podAffinity: + {{- if .Values.podAffinityLabelSelector }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- include "podAffinityRequiredDuringScheduling" . }} + {{- end }} + {{- if or .Values.podAffinityTermLabelSelector}} + preferredDuringSchedulingIgnoredDuringExecution: + {{- include "podAffinityPreferredDuringScheduling" . }} + {{- end }} +{{- end }} +{{- end }} + +{{- define "podAffinityRequiredDuringScheduling" }} + {{- range $labelSelector, $labelSelectorItem := .Values.podAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} + {{- range $labelSelector, $labelSelectorItem := .Values.global.podAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} +{{- end }} + +{{- define "podAffinityPreferredDuringScheduling" }} + {{- range $labelSelector, $labelSelectorItem := .Values.podAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} + {{- range $labelSelector, $labelSelectorItem := .Values.global.podAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} +{{- end }} + +{{- define "podAntiAffinity" }} +{{- if or .Values.podAntiAffinityLabelSelector .Values.podAntiAffinityTermLabelSelector}} + podAntiAffinity: + {{- if .Values.podAntiAffinityLabelSelector }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- include "podAntiAffinityRequiredDuringScheduling" . }} + {{- end }} + {{- if or .Values.podAntiAffinityTermLabelSelector}} + preferredDuringSchedulingIgnoredDuringExecution: + {{- include "podAntiAffinityPreferredDuringScheduling" . }} + {{- end }} +{{- end }} +{{- end }} + +{{- define "podAntiAffinityRequiredDuringScheduling" }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.podAntiAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.global.podAntiAffinityLabelSelector }} + - labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + {{- end }} +{{- end }} + +{{- define "podAntiAffinityPreferredDuringScheduling" }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.podAntiAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} + {{- range $labelSelectorIndex, $labelSelectorItem := .Values.global.podAntiAffinityTermLabelSelector }} + - podAffinityTerm: + labelSelector: + matchExpressions: + {{- range $index, $item := $labelSelectorItem.labelSelector }} + - key: {{ $item.key }} + operator: {{ $item.operator }} + {{- if $item.values }} + values: + {{- $vals := split "," $item.values }} + {{- range $i, $v := $vals }} + - {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + topologyKey: {{ $labelSelectorItem.topologyKey }} + weight: {{ $labelSelectorItem.weight }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/templates/_helpers.tpl b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..0da2ab7e96c5484969d62a47d189fc6cb9214da6 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/_helpers.tpl @@ -0,0 +1,55 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "redis.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "redis.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "redis.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "redis.labels" -}} +helm.sh/chart: {{ include "redis.chart" . }} +{{ include "redis.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "redis.selectorLabels" -}} +app: evaluation +component: redis +app.kubernetes.io/name: {{ include "redis.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/templates/deployment.yaml b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0d2fd94b24df93289e5efe68783d781cc36c651 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "redis.fullname" . }} + labels: + {{- include "redis.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "redis.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "redis.labels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: tcp + containerPort: 6379 + protocol: TCP + command: + - "redis-server" + - "--requirepass" + - "{{ .Values.global.redis.password }}" + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/templates/service.yaml b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ae273928db9d4a24bf166d791dd67fec986bfc7 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/templates/service.yaml @@ -0,0 +1,45 @@ + + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "redis.fullname" . }} + labels: + {{- include "redis.labels" . | nindent 4 }} +spec: +{{- if .Values.service.clusterIP }} + clusterIP: {{ .Values.service.clusterIP }} +{{- end }} +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.service.loadBalancerIP }} +{{- end }} +{{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy }} +{{- end }} + ports: + {{- $serviceType := .Values.service.type -}} + {{- range .Values.service.ports }} + - name: {{ tpl .name $ }} + port: {{ tpl (toString .port) $ }} + targetPort: {{ tpl (toString .targetPort) $ }} + {{- if and (eq $serviceType "NodePort") (.nodePort) }} + nodePort: {{ tpl (toString .nodePort) $ }} + {{- end }} + protocol: {{ tpl .protocol $ }} + {{- end }} +{{- if .Values.service.additionalPorts }} +{{ toYaml .Values.service.additionalPorts | indent 2 }} +{{- end }} + selector: + {{- include "redis.selectorLabels" . | nindent 4 }} + type: "{{ .Values.service.type }}" \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/redis/values.yaml b/evaluation/eval-charts/evaluation-controller/charts/redis/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f181262ac2e36c0195a57eaa3beda1b97e79c2d --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/redis/values.yaml @@ -0,0 +1,113 @@ +# Default values for redis. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +global: + podAntiAffinityLabelSelector: [] + podAntiAffinityTermLabelSelector: [] + podAffinityLabelSelector: [] + podAffinityTermLabelSelector: [] + nodeAffinityLabelSelector: [] + nodeAffinityTermLabelSelector: [] + +replicas: 1 + +image: + repository: redis + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: 7.2.4 + +timezone: "Asia/Shanghai" +hostNetwork: "false" +dnsPolicy: ClusterFirst +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +password: "{{ $.Values.global.redis.password }}" +db: "{{ $.Values.global.redis.db }}" +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +readinessProbe: + tcpSocket: + port: 6379 + failureThreshold: 6 + initialDelaySeconds: 15 + periodSeconds: 10 + successThreshold: 1 + +livenessProbe: + failureThreshold: 6 + initialDelaySeconds: 15 + periodSeconds: 20 + successThreshold: 1 + tcpSocket: + port: 6379 + timeoutSeconds: 1 + +chmodContainer: + enabled: true +securityContext: {} + +externalRedis: + enabled: false + hostIP: 192.168.1.1 + port: 6379 + +service: + ## Configuration for Clickhouse service + ## + annotations: {} + labels: {} + clusterIP: "" + + ## Port for Clickhouse Service to listen on + ## + + ports: + - name: tcp + port: 6379 + targetPort: 6379 + nodePort: + protocol: TCP + ## Additional ports to open for server service + additionalPorts: [] + + externalIPs: [] + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## must be Local + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +podAntiAffinityLabelSelector: [] +podAntiAffinityTermLabelSelector: [] +podAffinityLabelSelector: [] +podAffinityTermLabelSelector: [] +nodeAffinityLabelSelector: [] +nodeAffinityTermLabelSelector: [] + diff --git a/evaluation/eval-charts/evaluation-controller/charts/web/.helmignore b/evaluation/eval-charts/evaluation-controller/charts/web/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..0e8a0eb36f4ca2c939201c0d54b5d82a1ea34778 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/web/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/evaluation/eval-charts/evaluation-controller/charts/web/Chart.yaml b/evaluation/eval-charts/evaluation-controller/charts/web/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3ebf7fa98bb200d9885582e2eb36aeac86f3669 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/web/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: web +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.0.0" diff --git a/evaluation/eval-charts/evaluation-controller/charts/web/templates/_helpers.tpl b/evaluation/eval-charts/evaluation-controller/charts/web/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..604302cc9ec69c248fe95afca99285eae08009c9 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/web/templates/_helpers.tpl @@ -0,0 +1,55 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "web.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "web.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "web.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "web.labels" -}} +helm.sh/chart: {{ include "web.chart" . }} +{{ include "web.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "web.selectorLabels" -}} +app: evaluation +component: web +app.kubernetes.io/name: {{ include "web.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + diff --git a/evaluation/eval-charts/evaluation-controller/charts/web/templates/deployment.yaml b/evaluation/eval-charts/evaluation-controller/charts/web/templates/deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a06b42e9ef8489718ae67ea2b5e91b09270c81d --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/web/templates/deployment.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "web.fullname" . }} + labels: + {{- include "web.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "web.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "web.labels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: tcp + containerPort: 80 + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/evaluation/eval-charts/evaluation-controller/charts/web/templates/service.yaml b/evaluation/eval-charts/evaluation-controller/charts/web/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..161a330d57f3f43827df8da13faf1c546c53e051 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/web/templates/service.yaml @@ -0,0 +1,23 @@ + + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "web.fullname" . }} + labels: + {{- include "web.labels" . | nindent 4 }} +spec: + ports: + {{- $serviceType := .Values.service.type -}} + {{- range .Values.service.ports }} + - name: {{ tpl .name $ }} + port: {{ tpl (toString .port) $ }} + targetPort: {{ tpl (toString .targetPort) $ }} + {{- if and (eq $serviceType "NodePort") (.nodePort) }} + nodePort: {{ tpl (toString .nodePort) $ }} + {{- end }} + protocol: {{ tpl .protocol $ }} + {{- end }} + selector: + {{- include "web.selectorLabels" . | nindent 4 }} + type: "{{ .Values.service.type }}" \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/charts/web/values.yaml b/evaluation/eval-charts/evaluation-controller/charts/web/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a839e39f4339106b015c1b1108437e39a34adbdd --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/charts/web/values.yaml @@ -0,0 +1,69 @@ +# Default values for redis. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +global: + podAntiAffinityLabelSelector: [] + podAntiAffinityTermLabelSelector: [] + podAffinityLabelSelector: [] + podAffinityTermLabelSelector: [] + nodeAffinityLabelSelector: [] + nodeAffinityTermLabelSelector: [] + +replicas: 1 + +image: + repository: hub.deepflow.yunshan.net/public/ys-auto-test-web + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: "master" + +timezone: "Asia/Shanghai" +hostNetwork: "false" +dnsPolicy: ClusterFirst +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + + +chmodContainer: + enabled: true +securityContext: {} + + +service: + type: NodePort + ports: + - name: tcp + port: 20804 + targetPort: 20804 + nodePort: 30080 + protocol: TCP + + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +podAntiAffinityLabelSelector: [] +podAntiAffinityTermLabelSelector: [] +podAffinityLabelSelector: [] +podAffinityTermLabelSelector: [] +nodeAffinityLabelSelector: [] +nodeAffinityTermLabelSelector: [] + diff --git a/evaluation/eval-charts/evaluation-controller/eval-controller.yaml b/evaluation/eval-charts/evaluation-controller/eval-controller.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33e485653b466484c5121f37da68175ee8a6c460 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/eval-controller.yaml @@ -0,0 +1,58 @@ +global: + # 允许allInOne模式,将数据库数据 挂载至opt/ + allInOneLocalStorage: true + +image: + repository: hub.deepflow.yunshan.net/public/eval-controller + pullPolicy: Always + tag: "latest" + +controllerConfig: + max_runner_num: 2 + # [必填] controller以及执行测试例主机ssh信息 + global_ssh_port: + global_ssh_username: + global_ssh_password: + # HTTP Listen Port + listen_port: 10083 + # controller组件日志目录(挂载本地目录) + log_dir: /var/evaluation + # [必填] controller组件位于的主机ip + local_host_ip: "" + # 数据存储目录(挂载本地目录) + runner_data_dir: /var/log/evalutation + agent-tools: + deepflowce: + name: "Deepflow-Agent(CE)" + # deploy_type: k8s / workload + deploy_type: k8s + # deepflow是否添加了云平台信息 + server_add_cloud: 0 + cloud_info: + vpc_name: + domain_name: + # [必填]deepflow所在主机ssh信息 + server_ip: + server_ssh_port: + server_ssh_username: + server_ssh_password: + version: latest + # deepflow采集器配置 + config: + max_cpus: 1 + max_memory: 1024 + # [必填]若未填写platform-tools 则fixed_host必填,目前只支持fixed_host,ssh信息需与global_shh相同 + fixed_host: + # performance_analysis用例组所用打流机器ip,需提前安装wrk2 + performance_analysis_traffic_ip: + # performance_analysis用例组已部署nginx机器ip,需提前安装nginx + performance_analysis_nginx_ip: + # performance_analysis用例组已部署istio机器ip,需提前安装istio + performance_analysis_istio_ip: + # 云平台AK信息,未支持 + platform-tools: + type: + aliyun: + access_key: + secret_key: + region: diff --git a/evaluation/eval-charts/evaluation-controller/templates/_helpers.tpl b/evaluation/eval-charts/evaluation-controller/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..6aa11038bb5ebf69a871765b8ad7aaf82d22dfac --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/templates/_helpers.tpl @@ -0,0 +1,50 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "evaluation-controller.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "evaluation-controller.fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "evaluation-controller.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "evaluation-controller.labels" -}} +helm.sh/chart: {{ include "evaluation-controller.chart" . }} +{{ include "evaluation-controller.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "evaluation-controller.selectorLabels" -}} +app: evaluation +component: evaluation-controller +app.kubernetes.io/name: {{ include "evaluation-controller.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + diff --git a/evaluation/eval-charts/evaluation-controller/templates/configmap.yaml b/evaluation/eval-charts/evaluation-controller/templates/configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9ed7e6cd94e52a645d2b6a2d15a014686bb3560 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/templates/configmap.yaml @@ -0,0 +1,140 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: {{ include "evaluation-controller.fullname" . }} + labels: + {{- include "evaluation-controller.labels" . | nindent 4 }} +data: + controllerConfig: | + {{- range $elem, $elemVal := index .Values "controllerConfig" -}} + {{- if not (kindIs "map" $elemVal) -}} + {{- if kindIs "invalid" $elemVal -}} + {{- nindent 4 $elem -}}: + {{- else if kindIs "string" $elemVal -}} + {{- if (tpl $elemVal $ ) -}} + {{- nindent 4 $elem }}: {{ tpl $elemVal $ -}} + {{- else -}} + {{- nindent 4 $elem }}: + {{- end -}} + {{- else if kindIs "float64" $elemVal -}} + {{- nindent 4 $elem }}: {{ int $elemVal -}} + {{- else if kindIs "slice" $elemVal -}} + {{- nindent 4 $elem -}}: + {{- tpl (toYaml $elemVal ) $| nindent 4 -}} + {{- else -}} + {{- if $elemVal -}} + {{- nindent 4 $elem }}: {{ toYaml $elemVal -}} + {{- else -}} + {{- nindent 4 $elem }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $key, $value := index .Values "controllerConfig" -}} + {{- if kindIs "map" $value -}} + {{- nindent 4 $key -}}: + {{- range $elem, $elemVal := $value -}} + {{- if not (kindIs "map" $elemVal) -}} + {{- if kindIs "invalid" $elemVal -}} + {{- nindent 6 $elem -}}: + {{- else if kindIs "string" $elemVal -}} + {{- nindent 6 $elem }}: {{ tpl $elemVal $ -}} + {{- else if kindIs "float64" $elemVal -}} + {{- nindent 6 $elem }}: {{ int $elemVal -}} + {{- else if kindIs "slice" $elemVal -}} + {{- nindent 6 $elem -}}: + {{- tpl (toYaml $elemVal ) $| nindent 6 -}} + {{- else -}} + {{- nindent 6 $elem }}: {{ $elemVal -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $elem, $elemVal := $value -}} + {{- if kindIs "map" $elemVal -}} + {{- nindent 6 $elem -}}: + {{- range $therrKey, $therrVal := $elemVal -}} + {{- if not (kindIs "map" $therrVal) -}} + {{- if kindIs "invalid" $therrVal -}} + {{- nindent 8 $therrKey -}}: + {{- else if kindIs "string" $therrVal -}} + {{- if (tpl $therrVal $ ) -}} + {{- nindent 8 $therrKey }}: {{ tpl $therrVal $ -}} + {{- else -}} + {{- nindent 8 $therrKey }}: + {{- end -}} + {{- else if kindIs "float64" $therrVal -}} + {{- nindent 8 $therrKey }}: {{ int $therrVal -}} + {{- else if kindIs "slice" $therrVal -}} + {{- nindent 8 $therrKey -}}: + {{- tpl (toYaml $therrVal ) $| nindent 8 -}} + {{- else -}} + {{- if $therrVal -}} + {{- nindent 8 $therrKey }}: {{ toYaml $therrVal -}} + {{- else -}} + {{- nindent 8 $therrKey }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $therrKey, $therrVal := $elemVal -}} + {{- if kindIs "map" $therrVal -}} + {{- nindent 8 $therrKey -}}: + {{- range $fourKey, $fourVal := $therrVal -}} + {{- if not (kindIs "map" $fourVal) -}} + {{- if kindIs "invalid" $fourVal -}} + {{- nindent 10 $fourKey -}}: + {{- else if kindIs "string" $fourVal -}} + {{- if (tpl $fourVal $ ) -}} + {{- nindent 10 $fourKey }}: {{ tpl $fourVal $ -}} + {{- else -}} + {{- nindent 10 $fourKey }}: + {{- end -}} + {{- else if kindIs "float64" $fourVal -}} + {{- nindent 10 $fourKey }}: {{ int $fourVal -}} + {{- else if kindIs "slice" $fourVal -}} + {{- nindent 10 $fourKey -}}: + {{- tpl (toYaml $fourVal ) $| nindent 10 -}} + {{- else -}} + {{- if $fourVal -}} + {{- nindent 10 $fourKey }}: {{ toYaml $fourVal -}} + {{- else -}} + {{- nindent 10 $fourKey }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $fourKey, $fourVal := $therrVal -}} + {{- if kindIs "map" $fourVal -}} + {{- nindent 10 $fourKey -}}: + {{- range $fiveKey, $fiveVal := $fourVal -}} + {{- if not (kindIs "map" $fiveVal) -}} + {{- if kindIs "invalid" $fiveVal -}} + {{- nindent 12 $fiveKey -}}: + {{- else if kindIs "string" $fiveVal -}} + {{- if $fiveVal -}} + {{- nindent 12 $fiveKey }}: {{ tpl $fiveVal $ -}} + {{- else -}} + {{- nindent 12 $fiveKey }}: + {{- end -}} + {{- else if kindIs "float64" $fiveVal -}} + {{- nindent 12 $fiveKey }}: {{ int $fiveVal -}} + {{- else if kindIs "slice" $fiveVal -}} + {{- nindent 12 $fiveKey -}}: + {{- tpl (toYaml $fiveVal ) $| nindent 12 -}} + {{- else -}} + {{- if $fiveVal -}} + {{- nindent 12 $fiveKey }}: {{ $fiveVal -}} + {{- else -}} + {{- nindent 12 $fiveKey }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end }} \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/templates/controller-deployment.yaml b/evaluation/eval-charts/evaluation-controller/templates/controller-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa1c06c1b414e7f7ae29023ddc89e4268caa8397 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/templates/controller-deployment.yaml @@ -0,0 +1,88 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "evaluation-controller.fullname" . }}-controller + labels: + {{- include "evaluation-controller.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "evaluation-controller.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/customConfig: {{ sha256sum (print (tpl (toYaml .Values.controllerConfig) $)) }} + labels: + {{- include "evaluation-controller.selectorLabels" . | nindent 8 }} + spec: + hostNetwork: {{ tpl .Values.hostNetwork . }} + dnsPolicy: {{ tpl .Values.dnsPolicy . }} + dnsConfig: + nameservers: + {{- with .Values.nameservers }} + {{- toYaml . | nindent 10 }} + {{- end }} + imagePullSecrets: + {{- with .Values.imagePullSecrets }} + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: evaluation-controller + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ tpl .Values.image.repository . }}:{{ tpl .Values.image.tag . }}" + imagePullPolicy: "{{ tpl .Values.image.pullPolicy . }}" + ports: + - name: controller + containerPort: 10083 + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + command: + - "/bin/bash" + - "-c" + - | + source /root/venv/bin/activate && python3 -u /root/eval-controller/eval-controller.py + + volumeMounts: + - name: controller-config + mountPath: /etc/eval-controller.yaml + subPath: eval-controller.yaml + - name: controller-log-volume + mountPath: {{ .Values.controllerConfig.log_dir }} + - name: controller-data-volume + mountPath: {{ .Values.controllerConfig.runner_data_dir }} + volumes: + - name: controller-config + configMap: + name: {{ include "evaluation-controller.fullname" . }} + items: + - key: controllerConfig + path: eval-controller.yaml + - name: controller-log-volume + hostPath: + path: {{ .Values.controllerConfig.log_dir }} + type: DirectoryOrCreate + - name: controller-data-volume + hostPath: + path: {{ .Values.controllerConfig.runner_data_dir }} + type: DirectoryOrCreate + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-controller/templates/service.yaml b/evaluation/eval-charts/evaluation-controller/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c490efed7fa6437b94027caf4b10630093cad9d5 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/templates/service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "evaluation-controller.fullname" . }}-controller + labels: + {{- include "evaluation-controller.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + {{- $serviceType := .Values.service.type -}} + {{- range .Values.service.ports }} + - name: {{ tpl .name $ }} + port: {{ tpl (toString .port) $ }} + targetPort: {{ tpl (toString .targetPort) $ }} + {{- if and (eq $serviceType "NodePort") (.nodePort) }} + nodePort: {{ tpl (toString .nodePort) $ }} + {{- end }} + protocol: {{ tpl .protocol $ }} + {{- end }} + selector: + {{- include "evaluation-controller.selectorLabels" . | nindent 4 }} + type: "{{ .Values.service.type }}" + selector: + {{- include "evaluation-controller.selectorLabels" . | nindent 4 }} diff --git a/evaluation/eval-charts/evaluation-controller/values.yaml b/evaluation/eval-charts/evaluation-controller/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72e3b06a4974223fb133a697aa1f424b75930fb6 --- /dev/null +++ b/evaluation/eval-charts/evaluation-controller/values.yaml @@ -0,0 +1,116 @@ +global: + mysql: + password: "deepflow" + db: "evaluation" + redis: + password: "root" + db: "0" + allInOneLocalStorage: "false" + podAntiAffinityLabelSelector: [] + podAntiAffinityTermLabelSelector: [] + podAffinityLabelSelector: [] + podAffinityTermLabelSelector: [] + nodeAffinityLabelSelector: [] + nodeAffinityTermLabelSelector: [] + +image: + repository: hub.deepflow.yunshan.net/public/eval-controller + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +replicas: 1 + +hostNetwork: "false" +dnsPolicy: ClusterFirst +nameservers: +- 114.114.114.114 +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: NodePort + ports: + - name: tcp + port: 10083 + targetPort: 10083 + nodePort: + protocol: TCP + +controllerConfig: + listen_port: 10083 # HTTP Listen Port + log_dir: /var/log/evalutation # log dir + runner_data_dir: /var/evaluation # runner data dir + local_host_ip: "" + global_ssh_port: 22 + global_ssh_username: + global_ssh_password: + agent-tools: + deepflowce: + name: "Deepflow-Agent(CE)" + # deploy_type: k8s / workload + deploy_type: k8s + docking_platform: 0 + cloud_info: + vpc_name: + domain_name: + server_ip: + server_ssh_port: + server_ssh_username: + server_ssh_password: + version: latest + config: + max_cpus: 1 + max_memory: 1024 + fixed_host: + performance_analysis_traffic_ip: + performance_analysis_nginx_ip: + performance_analysis_istio_ip: + platform-tools: + type: + aliyun: + access_key: + secret_key: + region: + mysql: + host: "{{ $.Release.Name }}-mysql" + port: 30130 + user: root + password: "{{ $.Values.global.mysql.password }}" + db: "{{ $.Values.global.mysql.db }}" + redis: + host: "{{ $.Release.Name }}-redis" + port: 6379 + password: "{{ $.Values.global.redis.password }}" + db: "{{ $.Values.global.redis.db }}" + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/evaluation/eval-charts/evaluation-runner/.helmignore b/evaluation/eval-charts/evaluation-runner/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..0e8a0eb36f4ca2c939201c0d54b5d82a1ea34778 --- /dev/null +++ b/evaluation/eval-charts/evaluation-runner/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/evaluation/eval-charts/evaluation-runner/Chart.yaml b/evaluation/eval-charts/evaluation-runner/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..428b3cca4c61588bda2dad6d43cce094d953af5b --- /dev/null +++ b/evaluation/eval-charts/evaluation-runner/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: evaluation-runner +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/evaluation/eval-charts/evaluation-runner/templates/_helpers.tpl b/evaluation/eval-charts/evaluation-runner/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..77b2a68878a8dbfa8cbb77fc50c7e132520fc5b6 --- /dev/null +++ b/evaluation/eval-charts/evaluation-runner/templates/_helpers.tpl @@ -0,0 +1,50 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "evaluation-runner.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "evaluation-runner.fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "evaluation-runner.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "evaluation-runner.labels" -}} +helm.sh/chart: {{ include "evaluation-runner.chart" . }} +{{ include "evaluation-runner.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "evaluation-runner.selectorLabels" -}} +app: evaluation +component: evaluation-runner +app.kubernetes.io/name: {{ include "evaluation-runner.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + diff --git a/evaluation/eval-charts/evaluation-runner/templates/configmap.yaml b/evaluation/eval-charts/evaluation-runner/templates/configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be9c9ee1453128263706ffcf8323795b905ae0f8 --- /dev/null +++ b/evaluation/eval-charts/evaluation-runner/templates/configmap.yaml @@ -0,0 +1,140 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: {{ include "evaluation-runner.fullname" . }} + labels: + {{- include "evaluation-runner.labels" . | nindent 4 }} +data: + runnerConfig: | + {{- range $elem, $elemVal := index .Values "runnerConfig" -}} + {{- if not (kindIs "map" $elemVal) -}} + {{- if kindIs "invalid" $elemVal -}} + {{- nindent 4 $elem -}}: + {{- else if kindIs "string" $elemVal -}} + {{- if (tpl $elemVal $ ) -}} + {{- nindent 4 $elem }}: {{ tpl $elemVal $ -}} + {{- else -}} + {{- nindent 4 $elem }}: + {{- end -}} + {{- else if kindIs "float64" $elemVal -}} + {{- nindent 4 $elem }}: {{ int $elemVal -}} + {{- else if kindIs "slice" $elemVal -}} + {{- nindent 4 $elem -}}: + {{- tpl (toYaml $elemVal ) $| nindent 4 -}} + {{- else -}} + {{- if $elemVal -}} + {{- nindent 4 $elem }}: {{ toYaml $elemVal -}} + {{- else -}} + {{- nindent 4 $elem }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $key, $value := index .Values "runnerConfig" -}} + {{- if kindIs "map" $value -}} + {{- nindent 4 $key -}}: + {{- range $elem, $elemVal := $value -}} + {{- if not (kindIs "map" $elemVal) -}} + {{- if kindIs "invalid" $elemVal -}} + {{- nindent 6 $elem -}}: + {{- else if kindIs "string" $elemVal -}} + {{- nindent 6 $elem }}: {{ tpl $elemVal $ -}} + {{- else if kindIs "float64" $elemVal -}} + {{- nindent 6 $elem }}: {{ int $elemVal -}} + {{- else if kindIs "slice" $elemVal -}} + {{- nindent 6 $elem -}}: + {{- tpl (toYaml $elemVal ) $| nindent 6 -}} + {{- else -}} + {{- nindent 6 $elem }}: {{ $elemVal -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $elem, $elemVal := $value -}} + {{- if kindIs "map" $elemVal -}} + {{- nindent 6 $elem -}}: + {{- range $therrKey, $therrVal := $elemVal -}} + {{- if not (kindIs "map" $therrVal) -}} + {{- if kindIs "invalid" $therrVal -}} + {{- nindent 8 $therrKey -}}: + {{- else if kindIs "string" $therrVal -}} + {{- if (tpl $therrVal $ ) -}} + {{- nindent 8 $therrKey }}: {{ tpl $therrVal $ -}} + {{- else -}} + {{- nindent 8 $therrKey }}: + {{- end -}} + {{- else if kindIs "float64" $therrVal -}} + {{- nindent 8 $therrKey }}: {{ int $therrVal -}} + {{- else if kindIs "slice" $therrVal -}} + {{- nindent 8 $therrKey -}}: + {{- tpl (toYaml $therrVal ) $| nindent 8 -}} + {{- else -}} + {{- if $therrVal -}} + {{- nindent 8 $therrKey }}: {{ toYaml $therrVal -}} + {{- else -}} + {{- nindent 8 $therrKey }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $therrKey, $therrVal := $elemVal -}} + {{- if kindIs "map" $therrVal -}} + {{- nindent 8 $therrKey -}}: + {{- range $fourKey, $fourVal := $therrVal -}} + {{- if not (kindIs "map" $fourVal) -}} + {{- if kindIs "invalid" $fourVal -}} + {{- nindent 10 $fourKey -}}: + {{- else if kindIs "string" $fourVal -}} + {{- if (tpl $fourVal $ ) -}} + {{- nindent 10 $fourKey }}: {{ tpl $fourVal $ -}} + {{- else -}} + {{- nindent 10 $fourKey }}: + {{- end -}} + {{- else if kindIs "float64" $fourVal -}} + {{- nindent 10 $fourKey }}: {{ int $fourVal -}} + {{- else if kindIs "slice" $fourVal -}} + {{- nindent 10 $fourKey -}}: + {{- tpl (toYaml $fourVal ) $| nindent 10 -}} + {{- else -}} + {{- if $fourVal -}} + {{- nindent 10 $fourKey }}: {{ toYaml $fourVal -}} + {{- else -}} + {{- nindent 10 $fourKey }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- range $fourKey, $fourVal := $therrVal -}} + {{- if kindIs "map" $fourVal -}} + {{- nindent 10 $fourKey -}}: + {{- range $fiveKey, $fiveVal := $fourVal -}} + {{- if not (kindIs "map" $fiveVal) -}} + {{- if kindIs "invalid" $fiveVal -}} + {{- nindent 12 $fiveKey -}}: + {{- else if kindIs "string" $fiveVal -}} + {{- if $fiveVal -}} + {{- nindent 12 $fiveKey }}: {{ tpl $fiveVal $ -}} + {{- else -}} + {{- nindent 12 $fiveKey }}: + {{- end -}} + {{- else if kindIs "float64" $fiveVal -}} + {{- nindent 12 $fiveKey }}: {{ int $fiveVal -}} + {{- else if kindIs "slice" $fiveVal -}} + {{- nindent 12 $fiveKey -}}: + {{- tpl (toYaml $fiveVal ) $| nindent 12 -}} + {{- else -}} + {{- if $fiveVal -}} + {{- nindent 12 $fiveKey }}: {{ $fiveVal -}} + {{- else -}} + {{- nindent 12 $fiveKey }}: + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end }} \ No newline at end of file diff --git a/evaluation/eval-charts/evaluation-runner/templates/runner-job.yaml b/evaluation/eval-charts/evaluation-runner/templates/runner-job.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3351fe03b7bd59d2d8ea9e0a7a2b8098b8452c50 --- /dev/null +++ b/evaluation/eval-charts/evaluation-runner/templates/runner-job.yaml @@ -0,0 +1,69 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "evaluation-runner.fullname" . }} + labels: + {{- include "evaluation-runner.labels" . | nindent 4 }} +spec: + template: + metadata: + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/customConfig: {{ sha256sum (print (tpl (toYaml .Values.runnerConfig) $)) }} + labels: + {{- include "evaluation-runner.selectorLabels" . | nindent 8 }} + spec: + hostNetwork: {{ tpl .Values.hostNetwork . }} + dnsPolicy: {{ tpl .Values.dnsPolicy . }} + dnsConfig: + nameservers: + {{- with .Values.nameservers }} + {{- toYaml . | nindent 10 }} + {{- end }} + imagePullSecrets: + {{- with .Values.imagePullSecrets }} + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + restartPolicy: {{ .Values.restartPolicy }} + containers: + - name: evaluation-runner + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ tpl .Values.image.repository . }}:{{ tpl .Values.image.tag . }}" + imagePullPolicy: "{{ tpl .Values.image.pullPolicy . }}" + resources: + {{- toYaml .Values.resources | nindent 12 }} + command: + - "/bin/bash" + - "-c" + - | + source /root/venv/bin/activate && python3 -u /root/eval-runner/eval-runner.py || tail -f /dev/null + volumeMounts: + - name: runner-config + mountPath: /etc/eval-runner.yaml + subPath: eval-runner.yaml + volumes: + - name: runner-config + configMap: + name: {{ include "evaluation-runner.fullname" . }} + items: + - key: runnerConfig + path: eval-runner.yaml + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + backoffLimit: 0 diff --git a/evaluation/eval-charts/evaluation-runner/values.yaml b/evaluation/eval-charts/evaluation-runner/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1aeea1e08441febe49c641650feea49af340b2e4 --- /dev/null +++ b/evaluation/eval-charts/evaluation-runner/values.yaml @@ -0,0 +1,97 @@ +replicas: 1 + +image: + repository: hub.deepflow.yunshan.net/public/eval-runner + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +hostNetwork: "false" +dnsPolicy: ClusterFirst +restartPolicy: Never +nameservers: +- 114.114.114.114 +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + + +runnerConfig: + runner_data_dir: + listen_port: + global_ssh_port: 22 + global_ssh_username: + global_ssh_password: + case_params: + uuid: + case_name: + process_num: 1 + agent-tools: + deepflowce: + name: "Deepflow-Agent(CE)" + # deploy_type: k8s / workload + deploy_type: k8s + docking_platform: 0 + cloud_info: + vpc_name: + domain_name: + server_ip: + server_ssh_port: 22 + server_ssh_username: + server_ssh_password: + version: latest + config: + max_cpus: 1 + max_memory: 1024 + platform-tools: + type: + aliyun: + access_key: + secret_key: + region: + fixed_host: + performance_analysis_traffic_ip: + performance_analysis_nginx_ip: + performance_analysis_istio_ip: + mysql: + host: + port: + user: + password: + db: + redis: + host: + port: + password: + db: +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/evaluation/eval-controller/Dockerfile b/evaluation/eval-controller/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ae92aad8fb044ac3644357e612007e34ff82a8fa --- /dev/null +++ b/evaluation/eval-controller/Dockerfile @@ -0,0 +1,51 @@ +# evaluation +# +# VERSION 1.0.0 + +# 构建层 +FROM hub.deepflow.yunshan.net/dev/python:3.8.19-slim-bullseye AS builder +RUN apt-get update && \ + apt-get install --no-install-suggests \ + --no-install-recommends --yes \ + python3-venv=3.9.2-3 \ + build-essential=12.9 \ + gcc=4:10.2.1-1 \ + libpython3-dev=3.9.2-3 \ + libpq-dev=13.14-0+deb11u1 \ + default-libmysqlclient-dev=1.0.7 \ + libmariadb-dev=1:10.5.23-0+deb11u1 \ + libmariadb3=1:10.5.23-0+deb11u1 \ + make=4.3-4.1 \ + && \ + python3 -m venv /root/venv && \ + /root/venv/bin/pip install -U pip==24.0 +# 安装 Base pip 依赖包 +FROM builder AS builder-venv-base +RUN /root/venv/bin/pip install cffi==1.16.0 --trusted-host mirrors.aliyun.com --index-url https://mirrors.aliyun.com/pypi/simple/ +# 安装自定义 pip 依赖包 +FROM builder-venv-base AS builder-venv-custom +COPY requirements.txt /root/requirements.txt +RUN /root/venv/bin/pip install --disable-pip-version-check \ + --no-cache-dir \ + --trusted-host mirrors.aliyun.com \ + --index-url https://mirrors.aliyun.com/pypi/simple/ \ + -r /root/requirements.txt + +# 最终运行层 +FROM hub.deepflow.yunshan.net/dev/python:3.8.19-slim-bullseye AS runner +ARG TARGETARCH +RUN --mount=type=bind,target=/temp,from=builder-venv-custom,source=/ \ + BUILD_ARCH=$(echo ${TARGETARCH}|sed 's|amd64|x86_64|'|sed 's|arm64|aarch64|') && \ + cp -raf /temp/usr/lib/${BUILD_ARCH}-linux-gnu/libmariadb.* /usr/lib/${BUILD_ARCH}-linux-gnu/ && \ + cp -raf /temp/usr/lib/${BUILD_ARCH}-linux-gnu/libmariadb3/ /usr/lib/${BUILD_ARCH}-linux-gnu/libmariadb3/ && \ + cp -raf /temp/usr/lib/${BUILD_ARCH}-linux-gnu/libmysql* /usr/lib/${BUILD_ARCH}-linux-gnu/ && \ + cp -raf /temp/root/venv /root/venv && \ + apt-get update && \ + apt-get install --no-install-suggests \ + --no-install-recommends --yes \ + vim-tiny=2:8.2.2434-3+deb11u1 curl=7.74.0-1.3+deb11u11 +# 复制代码 +COPY ./etc/eval-controller.yaml /etc/ +COPY ./eval-controller/ /root/eval-controller/ +# Run +CMD /root/venv/bin/python3 -u /root/eval-controller/eval-controller.py \ No newline at end of file diff --git a/evaluation/eval-controller/README.md b/evaluation/eval-controller/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8f1f32e5829c67c151d040a9f0d022492ce5bbec --- /dev/null +++ b/evaluation/eval-controller/README.md @@ -0,0 +1,458 @@ +# Api Documentation + +## Resource: auto-test + +### POST /v1/evaluation/auto-test + +#### args +type: json +| name | type | required | description | +|------|------|----------|-------------| +| case_name | string | true | 测试例名称 | +| process_num | int | false | 测试例并发数 | +| agent_type | string | true | 采集器类型 | + +ps: + - case_name的值需要从api `/v1/evaluation/dictionary/case`获取 + - process_num暂不支持指定,默认1 + - agent_type的值需要从api `/v1/evaluation/dictionary/agent_type`获取 + +example: +``` +request: + +curl -XPOST "http://127.0.0.1:10083/v1/evaluation/auto-test" -H "Content-Type: application/json" -d '{"case_name":"performance_analysis_nginx_http","process_num":1,"agent_type":"deepflowce"}' + +reponse: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "CaseRecord", + "DATA": [ + ... + ] +} +``` + +### GET /v1/evaluation/auto-test + +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| +| uuid | string | flase | 测试例uuid | +| page_size | int | false | 分页内容数量 | +| page_index | int | false | 页数 | + +ps: + - uuid不填时获取所有测试例 + +#### response +type: json +``` +"DATA": { + "UUID": case_uuid, + "NAME": name, + "CASE_NAME": case_name, + "STATUS": case状态,具体含义调用api `/v1/evaluation/dictionary/case_status`获取 + "CREATED_AT": "2024-05-11 14:38:01" 创建时间 + "AGENT_TYPE": "Deepflow-Agent(CE)" +} +"PAGE": { + "INDEX": 2, # 当前页数 + "SIZE": 2, # 每页内容数量 + "TOTAL": 5, # 总页数 + "TOTAL_ITEM": 8 # 总内容数量 +} +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/auto-test?uuid=be3fb069-b69a-4df6-b513-3c7cf24eb991&page_index=2&page_size=2" + +reponse: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "CaseRecord", + "DATA": [ + { + "ID": 6, + "UUID": "be3fb069-b69a-4df6-b513-3c7cf24eb991", + "NAME": "performance_analysis_nginx_http-be3fb069" + "CASE_NAME": "performance_analysis_nginx_http", + "CASE_PARAMS": "", + "USER": null, + "RUNNER_COMMIT_ID": null, + "RUNNER_IMAGE_TAG": null, + "STATUS": 11, + "DELETED": 0, + "CREATED_AT": "2024-05-11 14:38:01" + }, + ... + ], + "PAGE": { + "INDEX": 2, + "SIZE": 2, + "TOTAL": 5, + "TOTAL_ITEM": 8 + } +} +``` + +### PATCH /v1/evaluation/auto-test + +#### args +type: json +| name | type | required | description | +|------|------|----------|-------------| +| uuids | []string | true | 测试例uuid列表 | +| status | int | true | 需要修改的状态 | + +ps: + - status支持的值需调用api `/v1/evaluation/dictionary/case_status_support_update`获取, 2.暂停 3.取消 4.恢复 + +example: +``` +request: + +curl -XPATCH "http://127.0.0.1:10083/v1/evaluation/auto-test" -H "Content-Type: application/json" -d '{"uuids":["be3fb069-b69a-4df6-b513-3c7cf24eb991"],"status":2}' + +reponse: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "CaseRecord", + "DATA": [ + ... + ] +} +``` + +### DELETE /v1/evaluation/auto-test + +#### args +type: json +| name | type | required | description | +|------|------|----------|-------------| +| uuids | []string | true | 测试例uuid | + +``` +request: + +curl -XDELETE "http://127.0.0.1:10083/v1/evaluation/auto-test" -H "Content-Type: application/json" -d '{"uuids":["be3fb069-b69a-4df6-b513-3c7cf24eb991"]}' + +reponse: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "CaseRecord", + "DATA": [ + ... + ] +} +``` + +## Resource: result + +### GET /v1/evaluation/result/log +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| +| uuid | string | true | 测试例uuid | +| type | int | true | 输出类型 | +| line_index | int | false | 指定行数位置开始读取,不传则从1开始 | +| line_size | int | false | 读取行数,不传则读取全部 | + +ps: + - type目前支持传 1.raw_log + +#### reponse + +``` +{ + "uuid" : + "logs" : [ + "", + "", + "" + ], + "line_index": 开始读取的行数 + "line_size": 返回的行数 + "line_count": 当前日志总行数 +} +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/result/log?uuid=test213&type=1&line_index=2&line_size=3" + +response: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "dict", + "DATA": { + "uuid": "test213", + "logs": [ + "test_log 1", + "test_log 2" + ], + "line_index": 2, # 开始读取的行数 + "line_size": 2, # 返回的行数 + "line_count": 3 # 总行数 + } +} +``` + +### GET /v1/evaluation/result/performance +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| +| uuid | string | true | 测试例uuid | +| type | int | true | 输出类型 | + +ps: + - type目前支持传 2.md + +#### response: +type: json +``` +[ + ["filename1", "md string"], + ["filename2", "md string"], + ... +] +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/result/performance?uuid=4a4eb371-6a40-420b-bd42-b6fd30c1f5b7&type=2" |jq . + + +response: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "list", + "DATA": [ + [ + "24051016-0553a94-agnet-perfromance-report_performance_protocol.md", + "# 开源采集器性能测试报告 - latest\n## 摘要\n本文档为开源采集器(deepflow-agent)的性能测试报告,将评估 Agent 在不同应用协议下的流量,分析其自身资源消耗。测试版本为 latest,测试完成时间为2024-05-10 16:34:00。\n\n## 测试环境\n环境信息:... + ] + ] +} +``` + +## Resource: dictionary +### GET /v1/evaluation/dictionary/case +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| + +#### response: +type: json +``` +[ + ["casename1", "case_path1", "case_description1"], + ["测试例名称", "测试例路径", "测试例描述"], + ... +] +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/dictionary/case" |jq . + +response: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "list", + "DATA": [ + [ + "performance_analysis_nginx_http", + "performance_analysis/test_performance_analysis_nginx_http.py", + "性能分析-极端高性能场景(nginx)" + ] + ] +} + +``` + +### GET /v1/evaluation/dictionary/case_status_support_update +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| +#### response: +type: json +``` +[ + [修改的case状态,状态名称,可供修改的状态列表] +] +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/dictionary/case_status_support_update" |jq . + + +response: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "list", + "DATA": [ + [2, "pause", [1]], # 只能在状态为1时修改状态为2 + [3, "cancel", [1, 2]], 只能在状态为1或2时修改状态为3 + [4, "resume", [2]] + ] +} + +``` + +### GET /v1/evaluation/dictionary/case_status +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| +#### response: +type: json +``` +[ + [状态int, 状态名称], + ... +] +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/dictionary/case_status" |jq . + +response: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "list", + "DATA": [ + [ + 0, + "Init" + ], + [ + 11, + "Starting" + ], + [ + 1, + "Running" + ], + [ + 12, + "Pending" + ], + [ + 2, + "Paused" + ], + [ + 21, + "Pausing" + ], + [ + 3, + "Finished" + ], + [ + 31, + "Stopping" + ], + [ + 4, + "Error" + ] + ] +} + +``` + +### GET /v1/evaluation/dictionary/agent_type +#### args +type: params +| name | type | required | description | +|------|------|----------|-------------| +#### response: +type: json +``` +[ + [采集器类型string, 采集器类型名称], + ... +] +``` + +example: +``` +request: + +curl -XGET "http://127.0.0.1:10083/v1/evaluation/dictionary/agent_type" |jq . + +response: + +{ + "OPT_STATUS": "SUCCESS", + "WAIT_CALLBACK": false, + "TASK": null, + "DESCRIPTION": "", + "TYPE": "list", + "DATA": [ + [ + "deepflowce", # 用于下发 + "Deepflow-Agent(CE)" #用于展示 + ] + ] +} + +``` \ No newline at end of file diff --git a/evaluation/eval-controller/etc/eval-controller.yaml b/evaluation/eval-controller/etc/eval-controller.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31f71354849a72a13780a3861ebb0d36ec142751 --- /dev/null +++ b/evaluation/eval-controller/etc/eval-controller.yaml @@ -0,0 +1,53 @@ +listen_port: 10083 # HTTP Listen Port +runner_data_dir: /var/evaluation # runner data dir +log_dir: /var/log/evalutation # log dir +local_host_ip: +global_ssh_port: 22 +global_ssh_username: +global_ssh_password: +max_runner_num: 3 + +agent-tools: + deepflowce: + name: Deepflow-Agent(CE) + # deploy_type: k8s / workload + deploy_type: k8s + + # deepflow-server 是否对接了云平台 + docking_platform: 0 + cloud_info: + vpc_name: + domain_name: + server_ip: + server_ssh_port: + server_ssh_username: + server_ssh_password: + version: + config: + max_cpus: 1 + max_memory: 1024 + +platform-tools: + type: aliyun + aliyun: + access_key: + secret_key: + region: + +fixed_host: + performance_analysis_traffic_ip: + performance_analysis_nginx_ip: + performance_analysis_istio_ip: + +mysql: + host: + port: + user: + password: + db: + +redis: + host: + port: + password: + db: \ No newline at end of file diff --git a/evaluation/eval-controller/eval-controller/common/const.py b/evaluation/eval-controller/eval-controller/common/const.py new file mode 100644 index 0000000000000000000000000000000000000000..3693135ec6bf1fbce3f6dbd16645c14b7b165d0b --- /dev/null +++ b/evaluation/eval-controller/eval-controller/common/const.py @@ -0,0 +1,8 @@ +CONTROLLER_CONFIG_PATH = "/etc/eval-controller.yaml" +API_PREFIX = "/v1/evaluation" + + +POD_MAX_ABNORMAL_STATUS_NUMBER = 10 +WAIT_MYSQL_RUNNING_TIMEOUT = 600 + +ALLURE_SERVER = "http://10.1.19.19:20080" diff --git a/evaluation/eval-controller/eval-controller/common/model.py b/evaluation/eval-controller/eval-controller/common/model.py new file mode 100644 index 0000000000000000000000000000000000000000..3e09116949af9c2956b25db424936416edfd4866 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/common/model.py @@ -0,0 +1,126 @@ +import uuid +from eval_lib.common.exceptions import BadRequestException +from eval_lib.model.base import BaseStruct +from eval_lib.model.const import CASE_PARAMS_STATUS_CREATE, CASE_PARAMS_STATUS_PAUSE, CASE_PARAMS_STATUS_CANCEL, CASE_PARAMS_STATUS_RESUME, CASE_PARAMS_STATUS_FROCE_END +from config import conf + +RUNNER_IMAGE_TAG_DEFAULT_LATEST = "latest" +AGENT_TYPE_DEFAULT_DEEPFLOWCE = "deepflowce" + + +class AutoTestCreate(BaseStruct): + + KEYS = [ + "uuid", "case_name", "process_num", "runner_image_tag", "agent_type" + ] + + def init(self, **kwargs): + super().init(**kwargs) + self.uuid = str(uuid.uuid4()) + self.runner_image_tag = RUNNER_IMAGE_TAG_DEFAULT_LATEST if not self.runner_image_tag else self.runner_image_tag + self.agent_type = AGENT_TYPE_DEFAULT_DEEPFLOWCE if not self.agent_type else self.agent_type + + def is_valid(self): + # TODO + if not (self.uuid): + raise BadRequestException("bad request") + if self.agent_type not in conf.agent_tools: + raise BadRequestException( + f"bad request agent_type {self.agent_type}" + ) + + +class AutoTestUpdate(BaseStruct): + + KEYS = ["uuids", "status"] + + def is_valid(self): + # TODO + if not self.uuids: + raise BadRequestException("bad request no uuids") + if self.status is not None: + if self.status not in [ + CASE_PARAMS_STATUS_CREATE, + CASE_PARAMS_STATUS_PAUSE, + CASE_PARAMS_STATUS_CANCEL, + CASE_PARAMS_STATUS_RESUME, + CASE_PARAMS_STATUS_FROCE_END, + ]: + raise BadRequestException(f"bad request status {self.status}") + + +class AutoTestDelete(BaseStruct): + + KEYS = ["uuids"] + + def is_valid(self): + # TODO + if not self.uuids: + raise BadRequestException("bad request") + + +class AutoTestFilter(BaseStruct): + + KEYS = ["uuid", "uuids", "status", "page_size", "page_index"] + + def init(self, **kwargs): + super().init(**kwargs) + self.page_size = int(self.page_size) if self.page_size else None + self.page_index = int(self.page_index) if self.page_index else None + + +class ResultPostLog(BaseStruct): + + KEYS = ["uuid", "type", "data"] + + def is_valid(self): + # TODO + if not self.uuid or self.type is None: + raise BadRequestException("bad request") + return True + + +class ResultGetLog(BaseStruct): + + KEYS = ["uuid", "type", "line_index", "line_size"] + + def init(self, **kwargs): + super().init(**kwargs) + self.type = int(self.type) if self.type is not None else self.type + if not self.line_index: + self.line_index = 1 + if not self.line_size: + self.line_size = 0 + self.line_index = int(self.line_index) + self.line_size = int(self.line_size) + + def is_valid(self): + # TODO + if not self.uuid or self.type is None: + raise BadRequestException("bad request") + return True + + +class ResultLogResponse(BaseStruct): + + KEYS = ["uuid", "logs", "line_index", "line_size", "line_count"] + + +class ResultGetFile(BaseStruct): + + KEYS = ["uuid", "type"] + + def init(self, **kwargs): + super().init(**kwargs) + self.type = int(self.type) if self.type is not None else self.type + + def is_valid(self): + # TODO + if not self.uuid or self.type is None: + raise BadRequestException("bad request") + return True + + +class ResultFileResponse(BaseStruct): + + KEYS = ["uuid", "files"] diff --git a/evaluation/eval-controller/eval-controller/common/mysql.py b/evaluation/eval-controller/eval-controller/common/mysql.py new file mode 100644 index 0000000000000000000000000000000000000000..9c34e7032e5e0d594ccb90c5f99730d3c5b570eb --- /dev/null +++ b/evaluation/eval-controller/eval-controller/common/mysql.py @@ -0,0 +1,44 @@ +import time +from . import const +from eval_lib.databases.mysql.db import db +from eval_lib.common.logger import get_logger +from eval_lib.databases.mysql.models.models import CaseRecord, CaseReport, Component + +log = get_logger() + + +def init_mysql(): + """ + 初始化MySQL数据库。 + + """ + start_time = time.time() + while True: + try: + db.connect() + db.create_tables([CaseRecord, CaseReport, Component]) + break # 如果成功连接并创建表,则退出循环 + except Exception as e: + if time.time() - start_time > const.WAIT_MYSQL_RUNNING_TIMEOUT: + log.error("MySQL deployment timed out") + raise TimeoutError("MySQL deployment timed out") + log.error(f"init mysql failed: {e}") + time.sleep(20) # 等待 20 秒后重试连接 + + +def update_case_record(uuid: str, **kwargs): + """ + 更新特定测试记录的信息。 + + 参数: + - uuid: str,要更新的案例记录的唯一标识符。 + - **kwargs: 额外的关键字参数,代表要更新的字段及其新值。 + """ + try: + # 根据UUID更新案例记录中的信息 + CaseRecord.update(**kwargs).where(CaseRecord.uuid == uuid).execute() + log.info(f"update case record {uuid} {kwargs} success") + except Exception as e: + # 记录更新失败的日志并抛出异常 + log.error(f"update case record: {uuid} , {kwargs}, failed: {e}") + # raise e diff --git a/evaluation/eval-controller/eval-controller/common/utils.py b/evaluation/eval-controller/eval-controller/common/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..022f5d435ac61dc7aaecb93cfb9fc14cb6888b2a --- /dev/null +++ b/evaluation/eval-controller/eval-controller/common/utils.py @@ -0,0 +1,150 @@ +import traceback +import json + +from config import conf +from datetime import date +from functools import wraps +from eval_lib.common.ssh import SSHPool +from eval_lib.common import logger +from eval_lib.common.exceptions import BadRequestException, InternalServerErrorException +from eval_lib.databases.mysql.models.base import BaseModel + +log = logger.get_logger() + +ssh_pool_default = SSHPool( + conf.global_ssh_port, + conf.global_ssh_username, + conf.global_ssh_password, +) + + +class Paginator: + + def __init__(self, model=None, page_index=None, page_size=None): + self.model = model + self.page_index = page_index + self.page_size = page_size + self.total_item = self.get_total() + if not page_size or not page_index or not self.total_item: + self.limit = None + self.offset = None + self.total_index = None + else: + self.limit = page_size + self.offset = (page_index - 1) * page_size + self.total_index = (self.total_item // self.page_size) + 1 + + def get_total(self): + if self.model: + return self.model.count() + else: + return None + + def to_json(self): + return { + "INDEX": self.page_index, + "SIZE": self.page_size, + "TOTAL": self.total_index, + "TOTAL_ITEM": self.total_item, + } + + +def exception_decorate(function): + + @wraps(function) + def wrapper(*args, **kwargs): + try: + return function(*args, **kwargs) + + except BadRequestException as e: + log.error(e) + return json_response( + status=e.status, description=str(e), wait_callback=False + ), 400 + + except InternalServerErrorException as e: + log.error(traceback.format_exc()) + return json_response( + status=e.status, description=str(e), wait_callback=False + ), 500 + + except Exception as e: + log.error(traceback.format_exc()) + return json_response( + status="SERVER_ERROR", description=str(e), wait_callback=False + ), 500 + + return wrapper + + +class EvalEncoder(json.JSONEncoder): + + def default(self, obj): + if isinstance(obj, BaseModel): + return obj.to_json() + elif isinstance(obj, date): + return obj.strftime("%Y-%m-%d %H:%M:%S") + else: + return super(EvalEncoder, self).default(obj) + + +def json_response( + status="SUCCESS", description=None, data=None, type=None, + wait_callback=False, task=None, page=None, flag=None, error_message=None +): + '''Generate json data for API response + + :param status: response status, HTTP status or specific status + :param description: + :param data: resource data + :types data: list or dict + :param type: + :param wait_callback: task synchronization or asynchronous + :param task: + :param page: + :param flag: + :return: + ''' + if task is not None: + wait_callback = True + data = dict_response( + status, description, data, type, wait_callback, task, page, flag, + error_message + ) + + return EvalEncoder().encode(data) + + +def dict_response( + status="SUCCESS", description=None, data=None, type=None, + wait_callback=False, task=None, page=None, flag=None, error_message=None +): + if description is None: + description = '' + if task is not None: + wait_callback = True + info = { + 'OPT_STATUS': status, + 'WAIT_CALLBACK': wait_callback, + 'TASK': task, + 'DESCRIPTION': description + } + if type is None and data is not None: + if isinstance(data, list): + if data: + type = data[0].__class__.__name__ + else: + type = None + else: + type = data.__class__.__name__ + if type is not None: + info['TYPE'] = type + if data is not None: + info['DATA'] = data + if page is not None: + info['PAGE'] = page + if flag is not None: + info['FLAG'] = flag + if error_message is not None: + info['ERROR_MESSAGE'] = error_message + return info diff --git a/evaluation/eval-controller/eval-controller/config.py b/evaluation/eval-controller/eval-controller/config.py new file mode 100644 index 0000000000000000000000000000000000000000..ec10ca377823afd45c12c83d40b104b4b93d01ff --- /dev/null +++ b/evaluation/eval-controller/eval-controller/config.py @@ -0,0 +1,74 @@ +import yaml +from common.const import CONTROLLER_CONFIG_PATH +from eval_lib.source.dictonary import Dictionary +import sys + + +class EvaluationConf(): + + def __init__(self): + self.listen_port = None + self.log_dir = None + self.runner_data_dir = None + self.local_host_ip = None + self.max_runner_num = None + self.agent_tools = {} + self.platform_tools = {} + self.parse() + + def parse(self): + try: + with open(CONTROLLER_CONFIG_PATH, 'r') as y: + yml = yaml.safe_load(y) + self.listen_port = yml.get('listen_port', 10083) + self.local_host_ip = yml.get('local_host_ip', "127.0.0.1") + self.log_dir = yml.get('log_dir', "/var/log/evaluation") + self.runner_data_dir = yml.get( + 'runner_data_dir', "/var/evaluation" + ) + self.max_runner_num = yml.get('max_runner_num', 10) + self.global_ssh_port = yml.get('global_ssh_port', 22) + self.global_ssh_username = yml.get('global_ssh_username', "") + self.global_ssh_password = yml.get('global_ssh_password', "") + self.fixed_host = yml.get('fixed_host', "") + self.parse_agent_tools(yml) + self.parse_platform_tools(yml) + self.parse_mysql(yml) + self.parse_redis(yml) + except Exception as e: + print("Yaml parser Error: %s" % e) + sys.exit(1) + + def parse_agent_tools(self, yml): + self.agent_tools = yml.get("agent-tools", {}) + # 更新dictionary AGNET_TYPE_DICTIONARY + Dictionary.update( + "agent_type", { + key: [self.agent_tools[key].get("name")] + for key in self.agent_tools.keys() + } + ) + + def parse_platform_tools(self, yml): + self.platform_tools = yml.get("platform-tools", {}) + + def parse_mysql(self, yml): + self.mysql = yml.get("mysql", {}) + self.mysql_host = self.mysql.get("host", "127.0.0.1") + self.mysql_port = self.mysql.get("port", 3306) + self.mysql_user = self.mysql.get("user", "root") + self.mysql_password = self.mysql.get("password", "deepflow") + self.mysql_db = self.mysql.get("db", "evaluation") + + def parse_redis(self, yml): + self.redis = yml.get("redis", {}) + self.redis_host = self.redis.get("host", "127.0.0.1") + self.redis_port = self.redis.get("port", 6379) + self.redis_password = self.redis.get("password", "root") + self.redis_db = self.redis.get("db", "0") + + def is_valid(self): + return self.listen_port and self.log_dir and self.runner_data_dir + + +conf = EvaluationConf() diff --git a/evaluation/eval-controller/eval-controller/eval-controller.py b/evaluation/eval-controller/eval-controller/eval-controller.py new file mode 100644 index 0000000000000000000000000000000000000000..06c741bc5fe3f6604cb14c018836fc74960cadc4 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/eval-controller.py @@ -0,0 +1,35 @@ +import sys +import multiprocessing + +from eval_lib.common.logger import LoggerManager +from manager.manager import Manager +from server.server import ServerProcess +from common.mysql import init_mysql +from config import conf + +# 主程序入口 +if __name__ == '__main__': + # 检查配置文件的有效性 + if not conf.is_valid(): + print('Invalid conf value, error exit.') + sys.exit(1) + # 初始化日志管理器,设置日志文件路径 + LoggerManager(log_file=f"{conf.log_dir}/evaluation.log") + + # 初始化MySQL连接 + init_mysql() + + # 使用多进程管理器创建一个消息队列 + # httpServer写入消息,manager读取消息 + m = multiprocessing.Manager() + message_queue = m.Queue() + + # 初始化并启动runner管理进程 + manager = Manager(message_queue) + manager.start() + + # 初始化并启动服务进程 + server = ServerProcess(queue=message_queue) + server.start() + # 等待服务进程执行完毕 + server.join() diff --git a/evaluation/eval-controller/eval-controller/eval_lib b/evaluation/eval-controller/eval-controller/eval_lib new file mode 120000 index 0000000000000000000000000000000000000000..ccd87bf41b8a393a87af84e50d361366dcb59b77 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/eval_lib @@ -0,0 +1 @@ +../../eval-lib \ No newline at end of file diff --git a/evaluation/eval-controller/eval-controller/manager/manager.py b/evaluation/eval-controller/eval-controller/manager/manager.py new file mode 100644 index 0000000000000000000000000000000000000000..57c82574ca779a0ebb1e5b7644f9b99e09f69e5c --- /dev/null +++ b/evaluation/eval-controller/eval-controller/manager/manager.py @@ -0,0 +1,139 @@ +import multiprocessing +import threading +import time +import threading +import traceback +import sys +import os +import traceback + +from typing import List +from multiprocessing import Process + +from eval_lib.model.base import CaseParams +from eval_lib.databases.mysql.models.models import CaseRecord +from eval_lib.databases.mysql.db import db +from eval_lib.databases.mysql import const as db_const +from manager.runner import Runner +from eval_lib.common.logger import get_logger +from eval_lib.model import const as model_const +from config import conf + +log = get_logger() +RUNNER_TIMEOUT = 60 * 60 + + +class Manager(Process): + + def __init__(self, q): + super().__init__() + self.message_queue: multiprocessing.Queue = q + self.runner_queue: List[Runner] = [] + self.runner_queue_lock = threading.Lock() + self.init() + + def init(self): + try: + main_file_path = os.path.abspath(sys.modules['__main__'].__file__) + os.chdir(os.path.dirname(main_file_path)) + with db.connection_context(): + CaseRecord.update( + status=db_const.CASE_RECORD_STATUS_EXCEPTION + ).where( + CaseRecord.status.not_in([ + db_const.CASE_RECORD_STATUS_FINISHED, + db_const.CASE_RECORD_STATUS_ERROR, + db_const.CASE_RECORD_STATUS_EXCEPTION + ]) + ).execute() + except Exception as e: + log.error(e) + + def run(self): + monitor_t = threading.Thread(target=self.monitor_runner_queue) + monitor_t.start() + while True: + try: + message = self.message_queue.get() + log.info(f"get message {vars(message)}") + if message.status == model_const.CASE_PARAMS_STATUS_CREATE: + log.info(f"insert test queue: {vars(message)}") + self.insert(message) + elif message.status == model_const.CASE_PARAMS_STATUS_PAUSE: + log.info(f"pause test queue: {vars(message)}") + self.pause(message) + elif message.status == model_const.CASE_PARAMS_STATUS_CANCEL: + log.info(f"cancel test queue: {vars(message)}") + self.cancel(message) + elif message.status == model_const.CASE_PARAMS_STATUS_RESUME: + log.info(f"resume test queue: {vars(message)}") + self.resume(message) + elif message.status == model_const.CASE_PARAMS_STATUS_FROCE_END: + log.info(f"force end test queue: {vars(message)}") + self.force_end(message) + except Exception as e: + log.error(e) + log.error(traceback.print_exc()) + time.sleep(5) + + def insert(self, params: CaseParams): + r = Runner(params) + with self.runner_queue_lock: + self.runner_queue.append(r) + r.start() + + def pause(self, params: CaseParams): + with self.runner_queue_lock: + for runner in self.runner_queue: + if runner.uuid == params.uuid: + runner.signal(runner.pause) + break + else: + log.error("pause: not found runner") + + def cancel(self, params: CaseParams): + with self.runner_queue_lock: + for runner in self.runner_queue: + if runner.uuid == params.uuid: + runner.signal(runner.cancel) + break + else: + log.error("cancel: not found runner") + + def resume(self, params: CaseParams): + with self.runner_queue_lock: + for runner in self.runner_queue: + if runner.uuid == params.uuid: + runner.signal(runner.resume) + break + else: + log.error("resume: not found runner") + + def force_end(self, params: CaseParams): + with self.runner_queue_lock: + for runner in self.runner_queue: + if runner.uuid == params.uuid: + runner.force_end() + break + + def monitor_runner_queue(self): + while True: + try: + with self.runner_queue_lock: + for r in self.runner_queue: + if not r.is_alive(): + pass + elif r.timeout(RUNNER_TIMEOUT): + r.cancel() + else: + continue + # TODO:收集结果 + + # 移除runner + r.remove_env() + self.runner_queue.remove(r) + time.sleep(3) + except Exception as e: + log.error(traceback.format_exc()) + log.error(e) + time.sleep(3) diff --git a/evaluation/eval-controller/eval-controller/manager/runner.py b/evaluation/eval-controller/eval-controller/manager/runner.py new file mode 100644 index 0000000000000000000000000000000000000000..4114c00b5a588307e86c10327f87ba8adc75f959 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/manager/runner.py @@ -0,0 +1,333 @@ +import datetime +import threading +import time +import requests +import yaml +from config import conf +from common.const import POD_MAX_ABNORMAL_STATUS_NUMBER, ALLURE_SERVER +from eval_lib.databases.redis.runner_info import RedisRunnerInfo +from common.utils import ssh_pool_default +from eval_lib.common.logger import get_logger +from eval_lib.model.base import CaseParams +from eval_lib.databases.mysql.db import db +from eval_lib.databases.mysql import const as db_const +from eval_lib.databases.redis import const as redis_const +from common.mysql import update_case_record +from report.report import ReportManager +import os + +log = get_logger() + + +class Runner(threading.Thread): + + def __init__(self, params: CaseParams): + super().__init__() + self.case_params = params + self.uuid = params.uuid + self.image_tag = params.runner_image_tag + self.start_time = int(time.time()) + self.redis_db = RedisRunnerInfo( + host=conf.redis_host, port=conf.redis_port, + password=conf.redis_password, db=conf.redis_db, max_connections=10 + ) + self.local_host_ip = conf.local_host_ip + self.runner_data_path = f"{conf.runner_data_dir}/runner-{self.uuid}" + self.release_name = f"runner-{self.uuid[:8]}" + self.callback = None + self.signal_lock = threading.Lock() + + def signal(self, input=None): + """ + 处理信号回调函数的设置和获取。 + + :param input: 指定一个新的回调函数,如果提供,则替换当前的回调函数。 + :type input: function + :return: 如果设置了回调函数且此次调用未提供新的回调函数,则返回当前的回调函数;否则返回None。 + """ + with self.signal_lock: # 确保设置或获取回调函数时的线程安全 + if input is not None: # 设置新的回调函数 + self.callback = input + return None + else: # 获取当前的回调函数 + return self.callback + + def run(self): + try: + with db.connection_context(): + update_case_record( + self.uuid, status=db_const.CASE_RECORD_STATUS_STARTING + ) + self.create_data_dir() + self.exec_env() + + self.wait() + + with db.connection_context(): + update_case_record( + self.uuid, status=db_const.CASE_RECORD_STATUS_STOPPING + ) + self.get_results() + + update_case_record( + self.uuid, status=db_const.CASE_RECORD_STATUS_FINISHED + ) + except Exception as e: + log.error(f"runner error: {e}") + with db.connection_context(): + update_case_record( + self.uuid, status=db_const.CASE_RECORD_STATUS_ERROR + ) + + def exec_env(self): + # TODO: leyi 创建pod, 写入redis + runner_yaml_path = f"{self.runner_data_path}/{self.release_name}.yaml" + self.create_runner_yaml_file(runner_yaml_path) + cmds = [ + "sudo helm repo update evaluation", + f"sudo helm install {self.release_name} evaluation/evaluation-runner -n evaluation --create-namespace -f {runner_yaml_path}", + ] + ssh_client = ssh_pool_default.get(self.local_host_ip) + try: + for cmd in cmds: + _, stdout, stderr = ssh_client.exec_command(cmd) + output = stdout.read().decode() + error = stderr.read().decode() + if error: + log.error(f"exec cmd {cmd} error: {error}") + return + log.info(f"exec cmd {cmd} output: {output}") + except Exception as e: + log.error(f"exec_env: error: {e}") + # redis 添加信息 + self.redis_db.init_runner_info(uuid=self.uuid) + time.sleep(10) + + def check_runner_pod_running(self): + command = f"sudo kubectl get pod -n evaluation |grep {self.release_name}-evaluation-runner " + ssh_client = ssh_pool_default.get(self.local_host_ip) + _, stdout, _ = ssh_client.exec_command(command) + output = stdout.read().decode() + if "Running" in output: + return True + else: + return False + + def check_runner_pod_completed(self): + runner_info = self.redis_db.get_runner_info(uuid=self.uuid) + if runner_info["runner-status"] == redis_const.CASE_STATUS_COMPLETED: + return True + else: + return False + + def wait(self): + """ + 等待测试用例执行完成。 + 此函数会周期性地检查 Runner Pod 的状态,直到 Pod 运行完成或达到最大异常状态次数。 + 如果检测到 Runner Pod 完成运行,则会记录执行状态并返回。 + 如果 Runner Pod 未完成运行且存在回调函数,则会调用回调函数。 + 如果 Runner Pod 的状态长时间未就绪,则会记录错误状态并抛出异常。 + """ + log.info("wait for case execution to complete") + count = 0 + runner_started = False + while count < POD_MAX_ABNORMAL_STATUS_NUMBER: + with db.connection_context(): + # 检查 Runner Pod 是否正在运行 + if not self.check_runner_pod_running(): + time.sleep(10) + count += 1 + if runner_started: + # 如果测试用例已经开始执行,但当前检测到未运行,则将其状态更新为待定,并重置开始标志 + update_case_record( + self.uuid, + status=db_const.CASE_RECORD_STATUS_PENDING + ) + runner_started = False + continue + + if not runner_started: + # 当检测到 Runner Pod 开始运行时,更新用例记录为执行中状态 + update_case_record( + self.uuid, status=db_const.CASE_RECORD_STATUS_STARTED + ) + runner_started = True + + # 检查 Runner Pod 是否已完成执行 + if not self.check_runner_pod_completed(): + # 如果 Runner Pod 未完成执行,且存在回调函数,则调用回调函数 + callback = self.signal() + if callback is not None: + callback() + self.callback = None + time.sleep(5) + continue + else: + # 如果 Runner Pod 完成执行,记录相关信息并返回 + log.info( + f"case exec finished, runner_status: {self.redis_db.get_runner_info(uuid=self.uuid)}" + ) + return + + # 如果达到最大异常状态次数,更新用例记录为错误状态,并抛出异常 + with db.connection_context(): + update_case_record( + self.uuid, status=db_const.CASE_RECORD_STATUS_ERROR + ) + log.error("runner pod status not ready") + raise Exception("runner pod status not ready") + + def remove_env(self): + command = f"sudo helm uninstall {self.release_name} -n evaluation" + ssh_client = ssh_pool_default.get(self.local_host_ip) + try: + self.redis_db.delete_runner_info(uuid=self.uuid) + _, _, stderr = ssh_client.exec_command(command) + error = stderr.read().decode() + if error: + log.error(f"uninstall env {self.release_name} error: {error}") + except Exception as e: + log.error(f"remove_env: error: {e}") + raise e + + def cancel(self): + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_STOPPING + ) + self.redis_db.cancel_case(uuid=self.uuid) + log.info("cancel case") + self.wait_case_sync() + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_FINISHED + ) + + def pause(self): + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_PAUSING + ) + self.redis_db.pause_case(uuid=self.uuid) + log.info("pause case") + # TODO:leyi 检查是否完成暂停 + self.wait_case_sync() + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_PAUSED + ) + + def resume(self): + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_STARTING + ) + self.redis_db.resume_case(uuid=self.uuid) + log.info("resume case") + self.wait_case_sync() + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_STARTED + ) + + def force_end(self): + self.redis_db.end_case(uuid=self.uuid) + log.info("force end case") + update_case_record( + uuid=self.uuid, status=db_const.CASE_RECORD_STATUS_FORCE_END + ) + + def timeout(self, timeout: int) -> bool: + if time.time() - self.start_time > timeout: + return True + return False + + def wait_case_sync(self): + while True: + time.sleep(5) + runner_info = self.redis_db.get_runner_info(uuid=self.uuid) + if runner_info["case-control-status"] == runner_info[ + "case-status"] or runner_info[ + "case-status"] == redis_const.CASE_STATUS_COMPLETED: + break + + def create_data_dir(self): + self.runner_report_path = f"{self.runner_data_path}/report" + self.runner_log_path = f"{self.runner_data_path}/log" + self.runner_allure_path = f"{self.runner_data_path}/allure-result" + self.runner_tmp = f"{conf.runner_data_dir}/tmp" + log.info(f"data_dir is : {self.runner_data_path}") + folder_paths = [ + self.runner_report_path, + self.runner_log_path, + self.runner_allure_path, + self.runner_tmp, + ] + for folder_path in folder_paths: + try: + os.makedirs(folder_path) + log.info(f"Runner {self.uuid} create folder: {folder_path}") + except FileExistsError: + pass + + def create_runner_yaml_file(self, file_path): + helm_value_dict = {} + runner_config_dict = {} + runner_config_dict["case_params"] = self.case_params.to_json() + runner_config_dict["redis"] = conf.redis + runner_config_dict["mysql"] = conf.mysql + runner_config_dict["listen_port"] = conf.listen_port + runner_config_dict["global_ssh_port"] = conf.global_ssh_port + runner_config_dict["global_ssh_username"] = conf.global_ssh_username + runner_config_dict["global_ssh_password"] = conf.global_ssh_password + runner_config_dict["fixed_host"] = conf.fixed_host + runner_config_dict["agent-tools"] = conf.agent_tools + runner_config_dict["platform-tools"] = conf.platform_tools + runner_config_dict["runner_data_dir"] = conf.runner_data_dir + helm_value_dict["runnerConfig"] = runner_config_dict + helm_value_dict["image"] = {"tag": self.image_tag} + with open(file_path, 'w') as file: + yaml.dump(helm_value_dict, file) + if not os.path.exists(file_path): + log.error(f"file :{file_path} not found") + + def get_results(self): + self.push_allure_results() + self.get_performance_results() + + def get_performance_results(self): + files_name = os.listdir(self.runner_report_path) + for file_name in files_name: + if file_name.endswith('.yaml'): + break + else: + return + log.info("generate test report") + try: + # TODO: luyao 查看是否收到performance文件,收到则生成报告 + rm = ReportManager( + report_path=self.runner_report_path, report_engines=None + ) + rm.run() + except Exception as e: + log.error(f"get performance results error: {e}") + + def push_allure_results(self): + allure_file_zip = f"{self.runner_allure_path}/allure-report.zip" + if not os.path.exists(allure_file_zip): + return + headers = {"accept": "*/*"} + files = { + 'allureReportArchive': ( + "allure-report.zip", open(allure_file_zip, + 'rb'), 'application/x-zip-compressed' + ) + } + current_timestamp = int(time.time()) + result_url = ALLURE_SERVER + "/api/report/" + self.uuid + "-" + str( + current_timestamp + )[-7:] + try: + resp = requests.post(result_url, files=files, headers=headers) + log.info(resp.text) + if resp.status_code == 201: + return resp.json() + else: + log.error("Unknown Error !!!") + except Exception as e: + log.error(f"upload allure file error: {e}") + return False diff --git a/evaluation/eval-controller/eval-controller/report/base.py b/evaluation/eval-controller/eval-controller/report/base.py new file mode 100644 index 0000000000000000000000000000000000000000..98b245788b1a7fa5c5a4ef1eb20779d1dcde1487 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/report/base.py @@ -0,0 +1,10 @@ +class ReportBase(object): + + def __init__(self, data_path, *args, **kwargs): + self.data_path = data_path + + def load_data(self, data=None): + pass + + def run(self): + pass diff --git a/evaluation/eval-controller/eval-controller/report/markdown.py b/evaluation/eval-controller/eval-controller/report/markdown.py new file mode 100644 index 0000000000000000000000000000000000000000..e02ec5c77c78f9debcd5dc2c0a3447687948a34e --- /dev/null +++ b/evaluation/eval-controller/eval-controller/report/markdown.py @@ -0,0 +1,230 @@ +import os +import re +import time +import datetime +import yaml +from jinja2 import Environment, BaseLoader, Undefined + +from eval_lib.common import logger + +log = logger.get_logger() + +# from mailmerge import MailMerge + +from .base import ReportBase + +# key is case_name pattern, value is TEMPLATE file path and case_group abbreviations +REPORT_TEMPLATE_LIST = { + "performance_analysis_nginx.*": ( + "./report/templates/agent_performance_report_nginx.md", + "performance_analysis_nginx" + ), + "performance_analysis_istio.*": ( + "./report/templates/agent_performance_report_istio.md", + "performance_analysis_istio" + ), +} + + +def get_report_template(case_name): + """ + 根据用例名称获取报告模板内容。 + + 参数: + case_name: str - 用例的名称,用于匹配模板。 + + 返回值: + str - 匹配到的模板内容。如果没有匹配到,则返回None。 + """ + content = None + # 遍历报告模板列表,尝试匹配用例名称 + for k, v in REPORT_TEMPLATE_LIST.items(): + # 使用正则匹配当前用例名称 + match = re.match(k, case_name) + if bool(match): + # 如果匹配成功,读取对应的文件内容 + file_path = v[0] + with open(file_path, "r") as f: + content = f.read() + break # 找到匹配项后即停止循环 + return content + + +def get_report_index(case_name): + """ + 根据用例名称获取报告索引 + + 参数: + case_name: str - 用例的名称 + + 返回值: + int - 报告模板的索引,如果没有匹配的模板则返回None + """ + template_index = None # 初始化模板索引为None + + # 遍历报告模板列表,尝试匹配用例名称 + for k, v in REPORT_TEMPLATE_LIST.items(): + match = re.match(k, case_name) # 使用正则匹配用例名称 + if bool(match): # 如果匹配成功 + template_index = v[1] # 设置模板索引 + break # 结束循环 + + return template_index + + +class Dict2Obj: + """ + 将字典转换为对象的类。 + + 参数: + - d (dict): 要转换为对象的字典。字典的键可以是数字或字符串。如果键是数字, + 并且该数字对应于`values`列表的索引位置,则将值添加到该位置; + 如果键是字符串,则将值设置为对象的属性。 + """ + + def __init__(self, d): + self.values = [] + for k, v in d.items(): + # 检查值是否为字典,如果是,则将其转换为Dict2Obj对象 + if isinstance(v, dict): + v = Dict2Obj(v) + # 处理以数字为键的情况,将值添加到正确的索引位置 + if k.isdigit(): + index = int(k) + if index == len(self.values): + self.values.append(v) + elif index > len(self.values): + # 如果索引超出当前`values`长度,先填充空项,再添加值 + self.values += [""] * (index - len(self.values)) + self.values.append(v) + elif index < len(self.values): + # 如果索引在`values`范围内,直接替换该索引位置的值 + self.values[index] = v + else: + # 处理以字符串为键的情况,设置对象的属性 + setattr(self, k, v) + + def __getitem__(self, key): + """ + 根据给定的键获取对应的值。 + + 当键小于存储的值的长度时,返回对应位置的值;否则返回空字符串。 + + 参数: + - key: int,要获取值的索引位置。 + + 返回值: + - 返回索引对应的值或空字符串。 + """ + if key < len(self.values): + return self.values[int(key)] + else: + return "" + + def __getattr__(self, key): + if key.isdigit() and key < len(self.values): + return self.values[int(key)] + else: + return "" + + +class SilentUndefined(Undefined): + + def __str__(self): + return "" + + def __getitem__(self, key): + return self + + def __getattr__(self, key): + return self + + +class ReportMarkdown(ReportBase): + + def __init__(self, data_path): + self.yaml_list = [] + + self.data_path = data_path + self.report_path = f"{data_path}/markdown/" + if os.path.exists(self.report_path): + pass + else: + log.info(f"mkdir {self.report_path}") + os.mkdir(self.report_path) + + + def load_data(self): + """ + 加载数据方法。 + 此方法遍历指定的数据路径(self.data_path),查找并读取所有以“.yaml”结尾的文件。 + 将每个文件的内容安全地加载为YAML格式,并将其添加到一个列表中,供后续使用。 + """ + for file in os.listdir(self.data_path): # 遍历数据路径中的所有文件 + if file.endswith(".yaml"): # 检查文件是否以“.yaml”结尾 + file_path = os.path.join(self.data_path, file) # 拼接文件的完整路径 + with open(file_path, "r") as f: # 打开文件,准备读取 + yaml_data = yaml.safe_load(f) # 安全加载YAML数据 + self.yaml_list.append(yaml_data) # 将加载的数据添加到列表中 + + def merge(self): + """ + 合并多个yaml数据,并根据模板生成报告。 + 该方法首先会将yaml_list中具有相同case_name的项合并,然后根据合并后的数据生成报告。 + 报告将根据模板渲染,并保存到指定的路径下。 + """ + + def write(data): + """ + 根据给定的数据生成报告并写入文件。 + + 参数: + - data: 一个字典,包含生成报告所需的所有数据,必须包含case_name键。 + + 无返回值。 + """ + # 获取报告模板并检查是否存在 + report_template = get_report_template(data["case_name"]) + if report_template is None: + return + # 获取报告索引 + template_index = get_report_index(data["case_name"]) + # 配置并加载模板引擎 + md_template = Environment( + loader=BaseLoader, undefined=SilentUndefined + ).from_string(report_template) + # 将数据转换为Dict2Obj类型,以便在模板中更方便地使用 + data = Dict2Obj(data) + # 根据模板和数据渲染报告 + report = md_template.render(data=data) + # 构造报告文件路径并写入报告内容 + report_path = f"{self.report_path}/agnet-perfromance-report-{template_index}.md" + with open(report_path, "w") as f: + f.write(report) + + # 初始化一个空字典用于存储合并后的数据 + data = {} + # 遍历yaml_list,将具有相同case_name的项合并 + for yaml_data in self.yaml_list: + template_index = get_report_index(yaml_data["case_name"]) + if template_index not in data: + data[template_index] = yaml_data + else: + data[template_index].update(yaml_data) + # 对合并后的数据逐个生成报告 + for index, v in data.items(): + write(v) + + def run(self): + try: + flag = 0 + for file in os.listdir(self.data_path): + if file.endswith(".yaml"): + flag = 1 + break + if flag == 0: + return + self.load_data() + self.merge() + except Exception as e: + log.error(e) diff --git a/evaluation/eval-controller/eval-controller/report/report.py b/evaluation/eval-controller/eval-controller/report/report.py new file mode 100644 index 0000000000000000000000000000000000000000..ac68e33117c19a4a756351f1413043a0b31fb024 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/report/report.py @@ -0,0 +1,42 @@ +import os +import glob +import importlib +from eval_lib.common import logger + +log = logger.get_logger() + + +class ReportManager(object): + + def __init__(self, report_path=None, report_engines=None): + self.engines = {} + self.report_path = report_path + self.report_engines = report_engines + + def get_report_engine(self): + plugin_files = glob.glob(os.path.join(f"./report", "*.py")) + log.info(f"plugin_files: {plugin_files}") + for file in plugin_files: + # 获取文件名(不含后缀) + file_name = os.path.basename(file)[:-3] + # 使用importlib.import_module动态导入模块 + module = importlib.import_module( + f"report.{file_name}", package="report" + ) + # 遍历模块中定义的所有属性和方法 + for name in dir(module): + # 如果属性或方法是以Plugin开头的类,则导入该类 + if name.startswith("Report") and name != "ReportBase" and name != "ReportManager": + if self.report_engines: + if name not in self.report_engines: + continue + cls = getattr(module, name) + # 将类添加到当前模块的全局变量中 + self.engines[name] = cls + + def run(self): + self.get_report_engine() + for name, engine in self.engines.items(): + log.info(f"report {name} start!") + engine(data_path=self.report_path).run() + log.info(f"report {name} end!") diff --git a/evaluation/eval-controller/eval-controller/report/templates/agent_performance_report_istio.md b/evaluation/eval-controller/eval-controller/report/templates/agent_performance_report_istio.md new file mode 100644 index 0000000000000000000000000000000000000000..f2b6f63e3a02331d3ebe1dfcd2348cfa9997278b --- /dev/null +++ b/evaluation/eval-controller/eval-controller/report/templates/agent_performance_report_istio.md @@ -0,0 +1,66 @@ +# 采集器性能测试报告(应用性能监控) - {{ data.version }} +## 摘要 +本文档为采集器(deepflow-agent)应用性能监控部分的性能测试报告,测试版本为 {{ data.version }},测试完成时间为{{ data.datetime }}。 + +## 应用性能监控 +本章评估社区版 Agent 在应用性能监控场景下的自身资源消耗,以及对被监控服务的影响。 +测试过程中 Agent 开启了如下功能: +- 应用指标(cBPF + eBPF,1m + 1s) +- 应用调用日志(cBPF + eBPF) +- 系统事件(eBPF File IO) +- 网络指标(cBPF,1m + 1s) +- 网络流日志(cBPF) +未开启如下功能: +- TCP 时序图 +- PCAP 下载 +- 流量分发 +DeepFlow 对通过 cBPF 采集 Packet Data 获取网络和应用的指标和日志,通过 eBPF 采集 Socket Data 获取应用和文件 IO 的指标和日志。关于 eBPF、cBPF 采集位置的示意图如下: + +在 {{ data.version }} 中,DeepFlow Agent 开启的 eBPF Probe 列表可参考DeepFlow GitHub Repo 中的文档。 + +### 测试环境 +- 虚拟机内核:{{ data.vm_kernal }} +- 虚拟机规格:{{ data.vm_cpu }}{{ data.vm_mem }} +- 采集器限制:{{ data.agent_limit_cpu }}{{ data.agent_limit_mem }} +- 采集器commitId:{{ data.commit_id }} + +### 典型云原生微服务场景(istio-bookinfo-demo) +#### 测试方法 +这一节我们希望测试典型云原生微服务业务场景下 deepflow-agent 的性能表现。我们找到了 Istio Bookinfo Demo。Istio 是一种流行的服务网格解决方案,在 GitHub 上拥有 32.9K Star。这个 Demo 的应用拓扑见下图,我们可以看到它由 Python、Java、Ruby、Node.JS 实现的四个微服务组成,每个微服务所在的 Pod 中运行着 Envoy 代理。这个 Demo 中的一个事务对应着访问 4 个微服务的 4 个调用,由于 Envoy 的存在实际调用链深度会被延长约两倍。 +我们使用 wrk2 来注入稳定的 QPS 负载,wrk2 会直接请求 Productpage 服务。所有的服务(包括 wrk2)部署在一个 8C16GB 的 K8s 节点上(CentOS 7、Kernel 4.19),我们会在该节点上部署 deepflow-agent Daemonset 来对所有调用进行采集,测试过程中限制了 deepflow-agent 资源消耗为 1C768MB。 +为了使得 Bookinfo 能够承受 50+% CPU 的高负载,我们调整了两个瓶颈服务的副本数:将 Productpage 调整为 4 副本、将 Details 调整为 2 副本。 + +```mermaid +graph LR; + Wrk2[Wrk2] --> Ingress[Ingress Envoy]; + Ingress --> Productpage[Productpage\npython]; + Productpage --> Details[Details\nRuby]; + Productpage --> Reviews-v1[Reviews-v1\njava]; + Productpage --> Reviews-v2[Reviews-v2\njava]; + Productpage --> Reviews-v3[Reviews-v3\njava]; + Reviews-v2 --> Ratings[Ratings\nNodejs]; + Reviews-v3 --> Ratings[Ratings\nNodejs]; + +``` + +具体的 wrk2 测试命令: +- wrk2 -c50 -t4 -R$rate -d60 -L http://$productpage_ip:9080/productpage +#### 详细数据 +无采集器运行时的测试数据(基线): +| 期望QPS | 实际QPS | P50 时延 (us) | P90 时延 (us) | productpage CPU | details CPU | reviews CPU | ratings CPU | Envoy CPU | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| {{ data.performance_analysis_istio_without_agent.server.rate[0] }} | {{ data.performance_analysis_istio_without_agent.server.rps[0] }} | {{ data.performance_analysis_istio_without_agent.server.lantency_p50[0] }} | {{ data.performance_analysis_istio_without_agent.server.lantency_p90[0] }} | {{ data.performance_analysis_istio_without_agent.productpage.max_cpu[0] }} | {{ data.performance_analysis_istio_without_agent.details.max_cpu[0] }} | {{ data.performance_analysis_istio_without_agent.ws_javaagent_jar.max_cpu[0] }} | {{ data.performance_analysis_istio_without_agent.ratings.max_cpu[0] }} | {{ data.performance_analysis_istio_without_agent.envoy.max_cpu[0] }} | +| {{ data.performance_analysis_istio_without_agent.server.rate[1] }} | {{ data.performance_analysis_istio_without_agent.server.rps[1] }} | {{ data.performance_analysis_istio_without_agent.server.lantency_p50[1] }} | {{ data.performance_analysis_istio_without_agent.server.lantency_p90[1] }} | {{ data.performance_analysis_istio_without_agent.productpage.max_cpu[1] }} | {{ data.performance_analysis_istio_without_agent.details.max_cpu[1] }} | {{ data.performance_analysis_istio_without_agent.ws_javaagent_jar.max_cpu[1] }} | {{ data.performance_analysis_istio_without_agent.ratings.max_cpu[1] }} | {{ data.performance_analysis_istio_without_agent.envoy.max_cpu[1] }} | +| {{ data.performance_analysis_istio_without_agent.server.rate[2] }} | {{ data.performance_analysis_istio_without_agent.server.rps[2] }} | {{ data.performance_analysis_istio_without_agent.server.lantency_p50[2] }} | {{ data.performance_analysis_istio_without_agent.server.lantency_p90[2] }} | {{ data.performance_analysis_istio_without_agent.productpage.max_cpu[2] }} | {{ data.performance_analysis_istio_without_agent.details.max_cpu[2] }} | {{ data.performance_analysis_istio_without_agent.ws_javaagent_jar.max_cpu[2] }} | {{ data.performance_analysis_istio_without_agent.ratings.max_cpu[2] }} | {{ data.performance_analysis_istio_without_agent.envoy.max_cpu[2] }} | + + + +有采集器运行时的测试数据: + +| 期望QPS | 实际QPS | P50 时延 (us) | P90 时延 (us) | productpage CPU | details CPU | reviews CPU | ratings CPU | Envoy CPU | Agent CPU | Agent 内存(byte) | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| {{ data.performance_analysis_istio_with_agent.server.rate[0] }} | {{ data.performance_analysis_istio_with_agent.server.rps[0] }} | {{ data.performance_analysis_istio_with_agent.server.lantency_p50[0] }} | {{ data.performance_analysis_istio_with_agent.server.lantency_p90[0] }} | {{ data.performance_analysis_istio_with_agent.productpage.max_cpu[0] }} | {{ data.performance_analysis_istio_with_agent.details.max_cpu[0] }} | {{ data.performance_analysis_istio_with_agent.ws_javaagent_jar.max_cpu[0] }} | {{ data.performance_analysis_istio_with_agent.ratings.max_cpu[0] }} | {{ data.performance_analysis_istio_with_agent.envoy.max_cpu[0] }} | {{ data.performance_analysis_istio_with_agent.agent.max_cpu[0] }} | {{ data.performance_analysis_istio_with_agent.agent.max_mem[0] }} | +| {{ data.performance_analysis_istio_with_agent.server.rate[1] }} | {{ data.performance_analysis_istio_with_agent.server.rps[1] }} | {{ data.performance_analysis_istio_with_agent.server.lantency_p50[1] }} | {{ data.performance_analysis_istio_with_agent.server.lantency_p90[1] }} | {{ data.performance_analysis_istio_with_agent.productpage.max_cpu[1] }} | {{ data.performance_analysis_istio_with_agent.details.max_cpu[1] }} | {{ data.performance_analysis_istio_with_agent.ws_javaagent_jar.max_cpu[1] }} | {{ data.performance_analysis_istio_with_agent.ratings.max_cpu[1] }} | {{ data.performance_analysis_istio_with_agent.envoy.max_cpu[1] }} | {{ data.performance_analysis_istio_with_agent.agent.max_cpu[1] }} | {{ data.performance_analysis_istio_with_agent.agent.max_mem[1] }} | +| {{ data.performance_analysis_istio_with_agent.server.rate[2] }} | {{ data.performance_analysis_istio_with_agent.server.rps[2] }} | {{ data.performance_analysis_istio_with_agent.server.lantency_p50[2] }} | {{ data.performance_analysis_istio_with_agent.server.lantency_p90[2] }} | {{ data.performance_analysis_istio_with_agent.productpage.max_cpu[2] }} | {{ data.performance_analysis_istio_with_agent.details.max_cpu[2] }} | {{ data.performance_analysis_istio_with_agent.ws_javaagent_jar.max_cpu[2] }} | {{ data.performance_analysis_istio_with_agent.ratings.max_cpu[2] }} | {{ data.performance_analysis_istio_with_agent.envoy.max_cpu[2] }} | {{ data.performance_analysis_istio_with_agent.agent.max_cpu[2] }} | {{ data.performance_analysis_istio_with_agent.agent.max_mem[2] }} | + + diff --git a/evaluation/eval-controller/eval-controller/report/templates/agent_performance_report_nginx.md b/evaluation/eval-controller/eval-controller/report/templates/agent_performance_report_nginx.md new file mode 100644 index 0000000000000000000000000000000000000000..23ad96c4bcd227b2b2f04870ba08d70460914029 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/report/templates/agent_performance_report_nginx.md @@ -0,0 +1,56 @@ +# 采集器性能测试报告(应用性能监控) - {{ data.version }} +## 摘要 +本文档为采集器(deepflow-agent)应用性能监控部分的性能测试报告,测试版本为 {{ data.version }},测试完成时间为{{ data.datetime }}。 + +## 应用性能监控 +本章评估社区版 Agent 在应用性能监控场景下的自身资源消耗,以及对被监控服务的影响。 +测试过程中 Agent 开启了如下功能: +- 应用指标(cBPF + eBPF,1m + 1s) +- 应用调用日志(cBPF + eBPF) +- 系统事件(eBPF File IO) +- 网络指标(cBPF,1m + 1s) +- 网络流日志(cBPF) +未开启如下功能: +- TCP 时序图 +- PCAP 下载 +- 流量分发 +DeepFlow 对通过 cBPF 采集 Packet Data 获取网络和应用的指标和日志,通过 eBPF 采集 Socket Data 获取应用和文件 IO 的指标和日志。关于 eBPF、cBPF 采集位置的示意图如下: + +在 {{ data.version }} 中,DeepFlow Agent 开启的 eBPF Probe 列表可参考DeepFlow GitHub Repo 中的文档。 + +### 测试环境 +- 虚拟机内核:{{ data.vm_kernal }} +- 虚拟机规格:{{ data.vm_cpu }}{{ data.vm_mem }} +- 采集器限制:{{ data.agent_limit_cpu }}{{ data.agent_limit_mem }} +- 采集器commitId:{{ data.commit_id }} + +### 极端高性能的业务场景(nginx-default-page) +#### 测试方法 +这一节我们希望测试一个极端高性能(极简业务逻辑、极低资源开销)的业务场景下 deepflow-agent 的性能表现。我们选择了 Nginx,我们知道它以性能强悍著称,它用 C 语言实现,而且我们在此 Demo 中让他只是简单的回复一个默认静态页。我们相信这个 Nginx Demo 的性能表现远超过任何一个实际的生产业务,我们希望使用这个 Demo 来说明两个问题:1)deepflow-agent 的采集性能如何;deepflow-agent 的采集对极端高性能业务的影响如何。 +我们使用 wrk2 来注入稳定的 QPS 负载,wrk2 会直接请求 Nginx 提供的 Default Page 服务。为了减少其他业务的干扰,我们将 Nginx 和 wrk2 部署在两个单独的虚拟机上(8C16GB、CentOS 7、Kernel 4.19),并且在 Nginx 所在虚拟机上部署了 deepflow-agent。测试过程中限制了 deepflow-agent 资源消耗为 1C768MB。 + +```mermaid +graph LR; + Wrk2[Vm1\nWrk2] --> Nginx[Vm2\nNginx]; +``` + +具体的 wrk2 测试命令: +- wrk2 -c1 -t1 -R$rate -d60 -L http://$nginx_ip:80/index.html +#### 详细数据 +无采集器运行时的测试数据(基线): +| 期望 QPS | 实际 QPS | P50 时延 | P90 时延 | Nginx CPU | +| --- | --- | --- | --- | --- | +| {{ data.performance_analysis_nginx_http_without_agent.server.rate[0] }} | {{ data.performance_analysis_nginx_http_without_agent.server.rps[0] }} | {{ data.performance_analysis_nginx_http_without_agent.server.lantency_p50[0] }} | {{ data.performance_analysis_nginx_http_without_agent.server.lantency_p90[0] }} | {{ data.performance_analysis_nginx_http_without_agent.nginx.max_cpu[0] }} | {{ data.performance_analysis_nginx_http_without_agent.agent.max_cpu[0] }} | {{ data.performance_analysis_nginx_http_without_agent.agent.max_mem[0] }} | +| {{ data.performance_analysis_nginx_http_without_agent.server.rate[1] }} | {{ data.performance_analysis_nginx_http_without_agent.server.rps[1] }} | {{ data.performance_analysis_nginx_http_without_agent.server.lantency_p50[1] }} | {{ data.performance_analysis_nginx_http_without_agent.server.lantency_p90[1] }} | {{ data.performance_analysis_nginx_http_without_agent.nginx.max_cpu[1] }} | {{ data.performance_analysis_nginx_http_without_agent.agent.max_cpu[1] }} | {{ data.performance_analysis_nginx_http_without_agent.agent.max_mem[1] }} | +| {{ data.performance_analysis_nginx_http_without_agent.server.rate[2] }} | {{ data.performance_analysis_nginx_http_without_agent.server.rps[2] }} | {{ data.performance_analysis_nginx_http_without_agent.server.lantency_p50[2] }} | {{ data.performance_analysis_nginx_http_without_agent.server.lantency_p90[2] }} | {{ data.performance_analysis_nginx_http_without_agent.nginx.max_cpu[2] }} | {{ data.performance_analysis_nginx_http_without_agent.agent.max_cpu[2] }} | {{ data.performance_analysis_nginx_http_without_agent.agent.max_mem[2] }} | + + + +有采集器运行时的测试数据: +| 期望 QPS | 实际 QPS | P50 时延 | P90 时延 | Nginx CPU | Agent CPU | Agent 内存 | +| --- | --- | --- | --- | --- | --- | --- | +| {{ data.performance_analysis_nginx_http_with_agent.server.rate[0] }} | {{ data.performance_analysis_nginx_http_with_agent.server.rps[0] }} | {{ data.performance_analysis_nginx_http_with_agent.server.lantency_p50[0] }} | {{ data.performance_analysis_nginx_http_with_agent.server.lantency_p90[0] }} | {{ data.performance_analysis_nginx_http_with_agent.nginx.max_cpu[0] }} | {{ data.performance_analysis_nginx_http_with_agent.agent.max_cpu[0] }} | {{ data.performance_analysis_nginx_http_with_agent.agent.max_mem[0] }} | +| {{ data.performance_analysis_nginx_http_with_agent.server.rate[1] }} | {{ data.performance_analysis_nginx_http_with_agent.server.rps[1] }} | {{ data.performance_analysis_nginx_http_with_agent.server.lantency_p50[1] }} | {{ data.performance_analysis_nginx_http_with_agent.server.lantency_p90[1] }} | {{ data.performance_analysis_nginx_http_with_agent.nginx.max_cpu[1] }} | {{ data.performance_analysis_nginx_http_with_agent.agent.max_cpu[1] }} | {{ data.performance_analysis_nginx_http_with_agent.agent.max_mem[1] }} | +| {{ data.performance_analysis_nginx_http_with_agent.server.rate[2] }} | {{ data.performance_analysis_nginx_http_with_agent.server.rps[2] }} | {{ data.performance_analysis_nginx_http_with_agent.server.lantency_p50[2] }} | {{ data.performance_analysis_nginx_http_with_agent.server.lantency_p90[2] }} | {{ data.performance_analysis_nginx_http_with_agent.nginx.max_cpu[2] }} | {{ data.performance_analysis_nginx_http_with_agent.agent.max_cpu[2] }} | {{ data.performance_analysis_nginx_http_with_agent.agent.max_mem[2] }} | + + diff --git a/evaluation/eval-controller/eval-controller/report/templates/images/1280X1280.PNG b/evaluation/eval-controller/eval-controller/report/templates/images/1280X1280.PNG new file mode 100644 index 0000000000000000000000000000000000000000..f4760f4ab8b22de9dc27891729bcbf1b89196282 Binary files /dev/null and b/evaluation/eval-controller/eval-controller/report/templates/images/1280X1280.PNG differ diff --git a/evaluation/eval-controller/eval-controller/report/templates/images/1700551489323.jpg b/evaluation/eval-controller/eval-controller/report/templates/images/1700551489323.jpg new file mode 100644 index 0000000000000000000000000000000000000000..630ebcc34dba8b70af749b30621e40dbfc60ab91 Binary files /dev/null and b/evaluation/eval-controller/eval-controller/report/templates/images/1700551489323.jpg differ diff --git a/evaluation/eval-controller/eval-controller/report/templates/images/1700551545881.jpg b/evaluation/eval-controller/eval-controller/report/templates/images/1700551545881.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d89b82c4944f9fda849ddf6a6619084ed721d8d9 Binary files /dev/null and b/evaluation/eval-controller/eval-controller/report/templates/images/1700551545881.jpg differ diff --git a/evaluation/eval-controller/eval-controller/server/auto_test.py b/evaluation/eval-controller/eval-controller/server/auto_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f5d9cb49044da75a613676bc5c8d9bfe4ab0f0d5 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/server/auto_test.py @@ -0,0 +1,52 @@ +from flask import request, Blueprint +from common.model import AutoTestCreate, AutoTestUpdate, AutoTestDelete, AutoTestFilter +from common.utils import json_response, exception_decorate +from common.const import API_PREFIX +from eval_lib.common import logger +from service.auto_test import AutoTest + +auto_test_app = Blueprint('auto_test_app', __name__, url_prefix=API_PREFIX) +log = logger.get_logger() + + +@auto_test_app.route("/auto-test", methods=["POST"]) +@exception_decorate +def exec_tests(): + json_data = request.json + at = AutoTestCreate(json_data) + at.is_valid() + + res, _ = AutoTest(auto_test_app.queue).Post(info=at) + return json_response(data=res), 200 + + +@auto_test_app.route("/auto-test", methods=["PATCH"]) +@exception_decorate +def update_test(): + json_data = request.json + at = AutoTestUpdate(json_data) + at.is_valid() + + res, _ = AutoTest(auto_test_app.queue).Update(info=at) + return json_response(data=res), 200 + + +@auto_test_app.route("/auto-test", methods=["DELETE"]) +@exception_decorate +def delete_tests(): + json_data = request.json + at = AutoTestDelete(json_data) + at.is_valid() + + res, _ = AutoTest(auto_test_app.queue).Delete(info=at) + return json_response(data=res), 200 + + +@auto_test_app.route("/auto-test", methods=["GET"]) +@exception_decorate +def get_tests(): + args = request.args + at = AutoTestFilter(**args) + + res, page = AutoTest(auto_test_app.queue).Get(info=at) + return json_response(data=res, page=page.to_json()), 200 diff --git a/evaluation/eval-controller/eval-controller/server/dictionary.py b/evaluation/eval-controller/eval-controller/server/dictionary.py new file mode 100644 index 0000000000000000000000000000000000000000..e5fc832150d731c6d6637fa7602478446db86b8a --- /dev/null +++ b/evaluation/eval-controller/eval-controller/server/dictionary.py @@ -0,0 +1,17 @@ +from flask import Blueprint + +from common.const import API_PREFIX +from common.utils import json_response, exception_decorate +from service.dictonary import DictionaryWorker +from eval_lib.common import logger + +dictionary_app = Blueprint('dictionary_app', __name__, url_prefix=API_PREFIX) +log = logger.get_logger() + + +@dictionary_app.route("/dictionary/", methods=['GET']) +@exception_decorate +def get_resource_dictionary(resource_name): + log.info(f'get_resource_dictionary: {resource_name}') + res = DictionaryWorker(resource_name).Get() + return json_response(data=res), 200 diff --git a/evaluation/eval-controller/eval-controller/server/result.py b/evaluation/eval-controller/eval-controller/server/result.py new file mode 100644 index 0000000000000000000000000000000000000000..c49ccb11f228c1952f5bb516f954c10867df813d --- /dev/null +++ b/evaluation/eval-controller/eval-controller/server/result.py @@ -0,0 +1,68 @@ +from flask import request, Blueprint +from zipfile import ZipFile + +from common.model import ResultPostLog, ResultGetLog, ResultGetFile +from common.utils import json_response, exception_decorate +from common.const import API_PREFIX +from eval_lib.common import logger +from eval_lib.common.exceptions import BadRequestException +from eval_lib.model.const import RESULT_TYPE_LOG_RAW, RESULT_TYPE_PERFORMANCE_MD +from service.result import ResultWorker + +result_app = Blueprint('result_app', __name__, url_prefix=API_PREFIX) +log = logger.get_logger() + + +@result_app.route("/result/zip", methods=["POST"]) +@exception_decorate +def post_result_zip(): + if 'file' not in request.files: + raise BadRequestException("No file part") + + file = request.files['file'] + if file.filename == "": + raise BadRequestException("No selected file") + + zip_file = ZipFile(file) + r = ResultWorker().post_zip(filename=file.filename, zipfile=zip_file) + return json_response(data=r), 200 + + +@result_app.route("/result/log", methods=["POST"]) +@exception_decorate +def post_result_log(): + json_data = request.json + rpl = ResultPostLog(json_data) + rpl.is_valid() + + if rpl.type != RESULT_TYPE_LOG_RAW: + raise BadRequestException("File type is not log") + r = ResultWorker().post_log(rpl) + return json_response(data=r), 200 + + +@result_app.route("/result/log", methods=["GET"]) +@exception_decorate +def get_result_log(): + args = request.args + rgl = ResultGetLog(**args) + rgl.is_valid() + + if rgl.type != RESULT_TYPE_LOG_RAW: + raise BadRequestException("Log File type is not suport") + r = ResultWorker().get_log(rgl) + return json_response(data=r), 200 + + +@result_app.route("/result/performance", methods=["GET"]) +@exception_decorate +def get_result_performance(): + args = request.args + rgl = ResultGetFile(**args) + rgl.is_valid() + + if rgl.type == RESULT_TYPE_PERFORMANCE_MD: + r = ResultWorker().get_performance_md(rgl) + else: + raise BadRequestException("Performance File type is not support") + return json_response(data=r), 200 diff --git a/evaluation/eval-controller/eval-controller/server/server.py b/evaluation/eval-controller/eval-controller/server/server.py new file mode 100644 index 0000000000000000000000000000000000000000..f5ccdc366dbe0dfdb023078f2e38f218d7c24fac --- /dev/null +++ b/evaluation/eval-controller/eval-controller/server/server.py @@ -0,0 +1,36 @@ +from flask import Flask +from multiprocessing import Process +from eval_lib.common import logger +from .auto_test import auto_test_app +from .result import result_app +from .dictionary import dictionary_app +from eval_lib.databases.mysql.db import db +from config import conf + +app = Flask(__name__) +log = logger.get_logger() +app.register_blueprint(auto_test_app) +app.register_blueprint(result_app) +app.register_blueprint(dictionary_app) + + +@app.before_request +def _db_connect(): + if db.is_closed(): + db.connect() + + +@app.teardown_request +def _db_close(exc): + if not db.is_closed(): + db.close() + + +class ServerProcess(Process): + + def __init__(self, queue): + auto_test_app.queue = queue + super().__init__() + + def run(self): + app.run(host="0.0.0.0", port=conf.listen_port) diff --git a/evaluation/eval-controller/eval-controller/service/auto_test.py b/evaluation/eval-controller/eval-controller/service/auto_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d971af91f1ff1bf7a9d85e0e23c30ee6c93b1dbe --- /dev/null +++ b/evaluation/eval-controller/eval-controller/service/auto_test.py @@ -0,0 +1,261 @@ +import time +import threading +import datetime + +from common.mysql import update_case_record +from common.utils import Paginator + +from eval_lib.common.exceptions import BadRequestException +from eval_lib.common import logger +from eval_lib.model import const as model_const +from eval_lib.model.base import CaseParams +from eval_lib.databases.mysql.models.models import CaseRecord +from eval_lib.databases.mysql import const as db_const +from config import conf + +from common.model import AutoTestCreate, AutoTestUpdate, AutoTestDelete, AutoTestFilter + +log = logger.get_logger() +POST_TIMEOUT = 10 +# 下发的修改状态请求后,目标预计状态列表。列表中,进行时状态需在完成时状态之前 +CR_STATUS_TARGET_MAP = { + # 暂停请求,预期目标状态为正在暂停和已暂停 + model_const.CASE_PARAMS_STATUS_PAUSE: [ + db_const.CASE_RECORD_STATUS_PAUSING, db_const.CASE_RECORD_STATUS_PAUSED + ], + # 取消请求,预期目标状态为正在停止和结束 + model_const.CASE_PARAMS_STATUS_CANCEL: [ + db_const.CASE_RECORD_STATUS_STOPPING, + db_const.CASE_RECORD_STATUS_FINISHED + ], + # 恢复请求,预期目标状态为正在启动和运行 + model_const.CASE_PARAMS_STATUS_RESUME: [ + db_const.CASE_RECORD_STATUS_STARTING, + db_const.CASE_RECORD_STATUS_STARTED + ], + # 重启请求,TODO + model_const.CASE_PARAMS_STATUS_RESTART: [ + db_const.CASE_RECORD_STATUS_STARTING, + db_const.CASE_RECORD_STATUS_STARTED + ], + model_const.CASE_PARAMS_STATUS_FROCE_END: [ + db_const.CASE_RECORD_STATUS_FORCE_END + ], +} + +# 不同的修改状态请求类型,所支持的当前状态 +PARAMS_STATUS_TARGET_MAP = { + # 暂停请求,仅支持在运行状态时 + model_const.CASE_PARAMS_STATUS_PAUSE: [ + db_const.CASE_RECORD_STATUS_STARTED + ], + # 取消请求,仅支持在运行和暂停状态时 + model_const.CASE_PARAMS_STATUS_CANCEL: [ + db_const.CASE_RECORD_STATUS_STARTED, db_const.CASE_RECORD_STATUS_PAUSED + ], + # 恢复请求,仅支持在暂停状态时 + model_const.CASE_PARAMS_STATUS_RESUME: [ + db_const.CASE_RECORD_STATUS_PAUSED + ], + # 重启请求,TODO + model_const.CASE_PARAMS_STATUS_RESTART: [ + db_const.CASE_RECORD_STATUS_PAUSED + ], + # 强制结束请求,支持所有状态 + model_const.CASE_PARAMS_STATUS_FROCE_END: [ + db_const.CASE_RECORD_STATUS_INIT, db_const.CASE_RECORD_STATUS_STARTED, + db_const.CASE_RECORD_STATUS_PAUSED, + db_const.CASE_RECORD_STATUS_STARTING, + db_const.CASE_RECORD_STATUS_PAUSING, + db_const.CASE_RECORD_STATUS_STOPPING, + db_const.CASE_RECORD_STATUS_ERROR, db_const.CASE_RECORD_STATUS_FINISHED + ] +} + +UPDATE_LOCK = threading.RLock() + + +class AutoTest(object): + + def __init__(self, queue) -> None: + self.queue = queue + + def Post(self, info: AutoTestCreate): + """ + 将测试创建消息发布到队列,并等待直到相关测试用例记录创建完成。 + + 参数: + - info: AutoTestCreate 类型,包含测试用例的创建信息。 + + 返回值: + - crs: CaseRecord 查询结果,如果测试用例记录创建成功则返回该记录。 + """ + # 记录日志信息,将消息放入队列 + with UPDATE_LOCK: + crs, _ = self.Get( + AutoTestFilter( + status=[ + db_const.CASE_RECORD_STATUS_INIT, + db_const.CASE_RECORD_STATUS_STARTED, + db_const.CASE_RECORD_STATUS_STARTING, + db_const.CASE_RECORD_STATUS_PENDING, + db_const.CASE_RECORD_STATUS_PAUSED, + db_const.CASE_RECORD_STATUS_PAUSING, + db_const.CASE_RECORD_STATUS_STOPPING + ] + ) + ) + if len(crs) >= conf.max_runner_num: + raise BadRequestException("max_runner_num reached") + for cr in crs: + if cr.case_name == info.case_name: + raise BadRequestException( + "{info.case_name} already in running" + ) + # 创建一个新的测试用例记录,并保存到数据库 + msg = CaseParams(info.to_json()) + cr = CaseRecord( + uuid=msg.uuid, name=f"{msg.case_name}-{msg.uuid[:8]}", + case_name=msg.case_name, process_num=msg.process_num, + agent_type=msg.agent_type, + status=db_const.CASE_RECORD_STATUS_INIT, + created_at=datetime.datetime.now() + + datetime.timedelta(hours=8) + ) + cr.save() + log.info(f"put msg to manager: {msg}") + msg.status = model_const.CASE_PARAMS_STATUS_CREATE + + self.queue.put(msg) + + # 设置等待超时时间 + wait_count = POST_TIMEOUT + at_filter = AutoTestFilter(uuid=msg.uuid) + + # 循环等待,直到测试用例记录被创建或超时 + while wait_count: + crs, _ = self.Get(info=at_filter) + if crs: + return crs, None + time.sleep(1) # 每秒检查一次 + wait_count -= 1 + return self.Get(info=at_filter) + + def Get(self, info: AutoTestFilter = None) -> list: + """ + 根据提供的过滤条件获取测试用例记录列表。 + + 参数: + - info: AutoTestFilter 类型,可选,用于指定获取测试用例的过滤条件。 + + 返回值: + - list: 包含满足过滤条件的测试用例记录的列表。 + """ + crs = [] + + # 默认只选择未被删除的测试用例记录 + not_delted_where_clause = CaseRecord.deleted == db_const.CASE_RECORD_NOT_DELETED + order_by = CaseRecord.created_at.desc() + + if info: + page = Paginator( + CaseRecord.select().where(not_delted_where_clause), + info.page_index, info.page_size + ) + + # 将过滤条件转换为 JSON 格式 + json_where = info.to_json() + # 根据 JSON 格式的过滤条件生成 WHERE 子句 + where_clause = CaseRecord.visible_where_clause(json_where) + # 如果过滤条件中包含删除状态,则加上未被删除的限制 + if not json_where.get("deleted"): + where_clause = (where_clause) & ( + not_delted_where_clause + ) if where_clause else not_delted_where_clause + # 根据 WHERE 子句查询满足条件的测试用例记录 + crs = CaseRecord.select().where(where_clause + ).order_by(order_by).limit( + page.limit + ).offset(page.offset) + # 将查询结果转换为列表并返回 + return [cr for cr in crs], page + else: + # 未提供过滤条件时,只选择未被删除的测试用例记录 + crs = CaseRecord.select().where(not_delted_where_clause + ).order_by(order_by) + return [cr for cr in crs], Paginator() + + def Update(self, info: AutoTestUpdate): + """ + 根据提供的AutoTestUpdate信息更新测试状态或记录。 + + 参数: + - info: AutoTestUpdate对象,包含需要更新的信息,如状态和UUID列表。 + + 返回: + - 返回调用Get方法的结果,该结果基于AutoTestFilter过滤条件获取。 + """ + if info.status is not None: + if info.status not in PARAMS_STATUS_TARGET_MAP.keys(): + raise BadRequestException("status is invalid") + at_filter = AutoTestFilter(uuids=info.uuids) + # 更新锁。每次更新状态时,需要加锁,防止多个线程同时更新状态 + with UPDATE_LOCK: + # 根据info中的状态,获取对应的CR_STATUS_TARGET_MAP列表 + cr_target_status_list = CR_STATUS_TARGET_MAP.get(info.status) + crs, _ = self.Get(info=at_filter) + need_update_crs = [] + for cr in crs: + if cr.status not in cr_target_status_list and cr.status in PARAMS_STATUS_TARGET_MAP.get( + info.status + ): + need_update_crs.append(cr) + if not need_update_crs: + raise BadRequestException( + "no test cases available to modify the status" + ) + + # 遍历info中的UUID列表,为每个UUID创建CaseParams消息并放入队列 + for cr in need_update_crs: + uuid = cr.uuid + msg = CaseParams(uuid=uuid, status=info.status) + log.info(f"put msg to manager: {msg}") + self.queue.put(msg) + + # 设置等待超时时间 + wait_count = POST_TIMEOUT + # 循环等待,直到所有相关测试记录的状态都符合预期 + while wait_count: + complete = 1 + crs, _ = self.Get(info=at_filter) + # 检查所有相关记录的状态是否都已更新 + for cr in crs: + if cr.status not in cr_target_status_list: + complete = 0 + if complete: + break + time.sleep(1) + wait_count -= 1 + return self.Get(info=at_filter) + else: + raise BadRequestException("no field can be updated") + # else: + # # 如果info中的状态为None,则从info中构建json_data,并更新CaseRecord表 + # json_data = info.to_json() + # uuids = json_data.pop("uuids") + # CaseRecord.update(**json_data).where(CaseRecord.uuid in uuids + # ).execute() + # 返回根据at_filter过滤得到的结果 + + def Delete(self, info: AutoTestDelete): + with UPDATE_LOCK: + for uuid in info.uuids: + update_case_record( + uuid, status=db_const.CASE_RECORD_STATUS_STOPPING + ) + msg = CaseParams( + uuid=uuid, status=model_const.CASE_PARAMS_STATUS_FROCE_END + ) + self.queue.put(msg) + update_case_record(uuid, deleted=db_const.CASE_RECORD_DELETED) + return [], None diff --git a/evaluation/eval-controller/eval-controller/service/dictonary.py b/evaluation/eval-controller/eval-controller/service/dictonary.py new file mode 100644 index 0000000000000000000000000000000000000000..bcbc992cd5f06a9369a04908e8a3690435394ea1 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/service/dictonary.py @@ -0,0 +1,23 @@ +from eval_lib.source.dictonary import Dictionary +from eval_lib.common import logger +from eval_lib.common.exceptions import BadRequestException + +log = logger.get_logger() + + +class DictionaryWorker(object): + + def __init__(self, resource_name): + self.resource_name = resource_name + self.data = [] + + def Get(self): + dct = Dictionary() + try: + dict_name = f"{self.resource_name.upper()}_DICTIONARY" + raw_data = getattr(dct, dict_name) + self.data = [[key] + values for key, values in raw_data.items()] + except AttributeError as e: + log.error(e) + raise BadRequestException("resource_name is not exist") + return self.data diff --git a/evaluation/eval-controller/eval-controller/service/result.py b/evaluation/eval-controller/eval-controller/service/result.py new file mode 100644 index 0000000000000000000000000000000000000000..2ea869d5cf66874d1240a0bcf253e09df8d29012 --- /dev/null +++ b/evaluation/eval-controller/eval-controller/service/result.py @@ -0,0 +1,131 @@ +import subprocess +import os +import traceback + +from eval_lib.common import logger +from eval_lib.common.exceptions import InternalServerErrorException +from common.model import ResultPostLog, ResultGetLog, ResultLogResponse, ResultGetFile, ResultFileResponse +from config import conf + +log = logger.get_logger() +POST_TIMEOUT = 10 + + +class ResultWorker(object): + + def post_log(self, msg: ResultPostLog): + """ + 将日志消息写入到文件中。 + + 参数: + - msg: ResultPostLog 类型。 + + 返回值: + - 无 + """ + # 根据消息的uuid生成日志文件路径 + log_file = f"{conf.runner_data_dir}/tmp/runner-{msg.uuid}.log" + # log.info(f"get post log msg {msg.uuid}, logfile: {log_file}") + + # 如果消息数据为空,则不进行任何操作 + if not msg.data: + return + try: + # 将日志数据追加写入到文件中 + with open(log_file, 'a+') as file: + file.write(msg.data) + except Exception as e: + # 记录日志写入过程中的错误,并抛出异常 + log.error(f"post log error {e}") + raise e + + def get_log(self, msg: ResultGetLog = None) -> dict: + """ + 获取指定运行器日志的一部分内容。 + + 参数: + - msg: ResultGetLog 类型,包含需要获取日志的 UUID 和索引信息。 + + 返回值: + - 一个字典,包含日志响应的内容,UUID、日志条目、行数。 + """ + # 构造日志文件路径 + log_file = f"{conf.runner_data_dir}/tmp/runner-{msg.uuid}.log" + if not os.path.exists(log_file): + log_file = f"{conf.runner_data_dir}/runner-{msg.uuid}/log/runner.log" + log.info(f"get log msg {msg}, logfile: {log_file}") + + # 初始化日志响应对象 + rlr = ResultLogResponse(uuid=msg.uuid, logs=[]) + if not os.path.exists(log_file): + return rlr.to_json() + try: + # 处理行大小设置,为0时则获取全部 + line_size = "$" if msg.line_size < 1 else msg.line_size + # 使用sed命令获取指定行范围的日志内容 + sed_cmd = f"sed -n '{msg.line_index},{line_size}p' {log_file}" + p = subprocess.run( + sed_cmd, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + logs = p.stdout.decode('utf-8') + err = p.stderr.decode('utf-8') + if err: + raise InternalServerErrorException(err) + + # 使用wc命令计算日志文件的行数 + wc_cmd = f"wc -l {log_file}" + p = subprocess.run( + wc_cmd, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + wc = p.stdout.decode('utf-8') + err = p.stderr.decode('utf-8') + if err: + raise InternalServerErrorException(err) + + # 如果有日志内容,则处理并设置到rlr对象中 + if logs: + logs = logs.split("\n")[:-1] + rlr.line_size = len(logs) + rlr.logs = logs + rlr.line_index = msg.line_index + # 设置日志总行数 + if wc: + rlr.line_count = int(wc.split(" ")[0]) + except Exception as e: + log.error(traceback.format_exc()) + log.error(f"get log error {e}") + raise e + + # 将rlr对象转换为JSON格式返回 + return rlr.to_json() + + def get_performance_results(self): + # TODO: luyao 获取性能测试结果 + pass + + def get_performance_md(self, info: ResultGetFile): + prefix = f"runner-{info.uuid}" + md_dir = f"{conf.runner_data_dir}/{prefix}/report/markdown" + data = [] + if not os.path.exists(md_dir): + return [] + for filename in os.listdir(md_dir): + if not filename.endswith(".md"): + continue + file_path = os.path.join(md_dir, filename) + if os.path.isfile(file_path): + with open(file_path, "r") as f: + data.append([filename, f.read()]) + return data + + def post_zip(self, filename, zipfile): + try: + prefix = filename.split(".zip")[0] + zipfile.extractall(path=f"{conf.runner_data_dir}/") + files = os.listdir(f"{conf.runner_data_dir}/{prefix}") + log.info(files) + except Exception as e: + log.error(f"post zip error {e}") + raise e diff --git a/evaluation/eval-lib/common/exceptions.py b/evaluation/eval-lib/common/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..347221e4f3b03c868109923643e5821cb7ed5f35 --- /dev/null +++ b/evaluation/eval-lib/common/exceptions.py @@ -0,0 +1,23 @@ +class EvaluationException(Exception): + message = None + status = None + + def __init__(self, message, status='FAIL'): + Exception.__init__(self) + self.message = message + self.status = status + + def __str__(self): + return 'Error (%s): %s' % (self.status, self.message) + + +class RunnerCodeNotExist(EvaluationException): + pass + + +class BadRequestException(EvaluationException): + pass + + +class InternalServerErrorException(EvaluationException): + pass diff --git a/evaluation/eval-lib/common/logger.py b/evaluation/eval-lib/common/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..7bc4f7732d98e3ad15dcd6002389b433236141eb --- /dev/null +++ b/evaluation/eval-lib/common/logger.py @@ -0,0 +1,73 @@ +import sys +import os +import datetime, time +import logging +from logging import FileHandler +from logging import StreamHandler + +LOG_LEVEL_MAP = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warn": logging.WARN, + "error": logging.ERROR, +} + +def beijing(sec): + if time.strftime('%z') == "+0800": + return datetime.datetime.now().timetuple() + return (datetime.datetime.now() + datetime.timedelta(hours=8)).timetuple() + +class LoggerManager(object): + + LOGGER = logging.getLogger('root') + + def __init__(self, log_level="debug", log_file="/root/log"): + self.log_level = log_level + self.log_file = log_file + self.formatter = logging.Formatter( + '%(asctime)s ' + '%(levelname)s %(module)s.' + '%(funcName)s.%(lineno)s: %(message)s' + ) + self.formatter.converter = beijing + self.init_logger() + + @property + def stdout_handler(self): + stdout_handler = StreamHandler(sys.stdout) + stdout_handler.setFormatter(self.formatter) + return stdout_handler + + @classmethod + def get_logger(cls): + return cls.LOGGER + + def get_child_logger( + self, name='root', log_level='debug', log_file="", propagate=True + ): + logger = logging.getLogger(name) + logger.propagate = propagate + logger.setLevel(LOG_LEVEL_MAP.get(log_level)) + if log_file: + file_handler = FileHandler(log_file) + file_handler.setFormatter(self.formatter) + logger.addHandler(file_handler) + logger.addHandler(self.stdout_handler) + return logger + + def init_logger(self): + if len(self.LOGGER.handlers) > 0: + return + log_dir = os.path.dirname(self.log_file) + if not os.path.exists(log_dir): + os.makedirs(log_dir) + self.LOGGER.setLevel(LOG_LEVEL_MAP.get(self.log_level)) + if self.log_file: + file_handler = FileHandler(self.log_file) + file_handler.setFormatter(self.formatter) + self.LOGGER.addHandler(file_handler) + self.LOGGER.addHandler(self.stdout_handler) + + +def get_logger(): + return LoggerManager.get_logger() diff --git a/evaluation/eval-lib/common/ssh.py b/evaluation/eval-lib/common/ssh.py new file mode 100644 index 0000000000000000000000000000000000000000..de2ffdddca8686fd4b03a930161f4e789ac27c0d --- /dev/null +++ b/evaluation/eval-lib/common/ssh.py @@ -0,0 +1,47 @@ +import paramiko +from typing import Dict + +DEFAULT_TIMEOUT = 6 * 60 + + +class SSHClient(paramiko.SSHClient): + + def exec_command(self, command, timeout=DEFAULT_TIMEOUT): + return super().exec_command(command, timeout=timeout) + + +class SSHPool(object): + pool: Dict[str, paramiko.SSHClient] = {} + + def __init__( + self, default_port=22, default_username=None, default_password=None + ): + self.default_port = default_port + self.default_username = default_username + self.default_password = default_password + + def connect(self, ip, port, username, password): + ssh_client = SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + try: + ssh_client.connect(ip, port, username, password) + except paramiko.SSHException as e: + return None + return ssh_client + + def get(self, ip, port=None, username=None, password=None) -> SSHClient: + port = port or self.default_port + username = username or self.default_username + password = password or self.default_password + ssh_client = self.pool.get(ip) + if ssh_client and ssh_client.get_transport( + ) and ssh_client.get_transport().is_active(): + return ssh_client + ssh_client = self.connect(ip, port, username, password) + self.pool[ip] = ssh_client + return ssh_client + + def close(self): + for ssh_client in self.pool.values(): + ssh_client.close() + self.pool.clear() diff --git a/evaluation/eval-lib/databases/influx/influx_db.py b/evaluation/eval-lib/databases/influx/influx_db.py new file mode 100644 index 0000000000000000000000000000000000000000..e3b2b239eac337fffc8751fae67f45ee67b6c605 --- /dev/null +++ b/evaluation/eval-lib/databases/influx/influx_db.py @@ -0,0 +1,33 @@ +from influxdb import InfluxDBClient + +class InfulxDB: + def __init__(self, host="127.0.0.1", port=8086, user="root", password="", database=""): + self.host = host + self.port = port + self.user = user + self.password = password + self.database = database + self.client = InfluxDBClient( + self.host, self.port, self.user, + self.password, self.database, + ) + + def get_procstat_result(self, process_name, start_time, end_time): + '''获取特定进程cpu/mem在一段时间内的90th的使用率 + return {'max_cpu_usage': 10.0, 'max_mem_usage': 10.0} + ''' + #time unit conversion s -> ns + start_time = start_time * 1000000000 + end_time = end_time * 1000000000 + filter = f"pattern = '{process_name}'" + sql = f"SELECT percentile(sum_cpu_usage, 90) AS max_cpu_usage, percentile(sum_memory_usage, 90) AS max_mem_usage \ + FROM (SELECT sum(cpu_usage) as sum_cpu_usage, sum(memory_usage) as sum_memory_usage FROM procstat \ + WHERE {filter} AND time >= {start_time} AND time <= {end_time} GROUP BY time(10s))" + result = self.client.query(sql) + procstat = list(result.get_points()) + if not procstat: + return {'max_cpu_usage': 0.0, 'max_mem_usage': 0.0} + return procstat[0] + + + diff --git a/evaluation/eval-lib/databases/mysql/const.py b/evaluation/eval-lib/databases/mysql/const.py new file mode 100644 index 0000000000000000000000000000000000000000..b85cb757a6730528c2934b95862bd8bfa79b3d13 --- /dev/null +++ b/evaluation/eval-lib/databases/mysql/const.py @@ -0,0 +1,18 @@ +COMPONENT_TYPE_UNKNOWN = 0 # 未知类型 +COMPONENT_TYPE_DF_AGENT = 1 +COMPONENT_TYPE_DF_SERVER = 2 + +CASE_RECORD_STATUS_INIT = 0 +CASE_RECORD_STATUS_STARTED = 1 +CASE_RECORD_STATUS_STARTING = 11 +CASE_RECORD_STATUS_PENDING = 12 +CASE_RECORD_STATUS_PAUSED = 2 +CASE_RECORD_STATUS_PAUSING = 21 +CASE_RECORD_STATUS_FINISHED = 3 +CASE_RECORD_STATUS_STOPPING = 31 +CASE_RECORD_STATUS_ERROR = 4 +CASE_RECORD_STATUS_EXCEPTION = 5 +CASE_RECORD_STATUS_FORCE_END = 6 + +CASE_RECORD_NOT_DELETED = 0 +CASE_RECORD_DELETED = 1 \ No newline at end of file diff --git a/evaluation/eval-lib/databases/mysql/db.py b/evaluation/eval-lib/databases/mysql/db.py new file mode 100644 index 0000000000000000000000000000000000000000..6a5b8915066545abfc0c2af568c9c87028b6273c --- /dev/null +++ b/evaluation/eval-lib/databases/mysql/db.py @@ -0,0 +1,22 @@ +from config import conf +from playhouse.pool import PooledMySQLDatabase + + +def init_mysql_db(config): + db = PooledMySQLDatabase( + database=config.mysql_db, + host=config.mysql_host, + port=config.mysql_port, + user=config.mysql_user, + password=config.mysql_password, + max_connections=100, + charset="utf8", + stale_timeout=30, + connect_timeout=20, + ) + #db.connect() + + return db + + +db = init_mysql_db(conf) \ No newline at end of file diff --git a/evaluation/eval-lib/databases/mysql/models/base.py b/evaluation/eval-lib/databases/mysql/models/base.py new file mode 100644 index 0000000000000000000000000000000000000000..d2da99f35e8e7d6c373a4d54edcb79442f66083d --- /dev/null +++ b/evaluation/eval-lib/databases/mysql/models/base.py @@ -0,0 +1,83 @@ +from typing import Union +from peewee import PrimaryKeyField +from peewee import Model + +from ..db import db +from ....model.base import BaseStruct + + +class BaseModel(Model): + id = PrimaryKeyField() + + class Meta: + database = db + + def to_json(self): + return { + key.column_name.upper(): self._get_trans_value( + key.column_name, + ) for key in self._meta.sorted_fields + } + + def _get_trans_value(self, key): + return getattr(self, key, None) + + @classmethod + def visible_where_clause(cls, filter: Union[dict, BaseStruct], **kwargs): + """ + 根据提供的过滤条件生成对应的可见性 WHERE 子句。 + + 参数: + - cls: 当前类,用于调用类级别的 where_clause 方法。 + - filter: 一个字典或 BaseStruct 实例,包含用于构建 WHERE 子句的过滤条件。 + - **kwargs: 额外的关键字参数,也可用于构建 WHERE 子句。 + + 返回值: + - 返回一个表示 WHERE 条件的表达式,这些条件由 filter 和 kwargs 中的参数生成。 + """ + where = None + # 遍历 filter 参数生成的 WHERE 子句,并合并为一个表达式 + for clause in cls.where_clause(filter): + if where is None: + where = clause + else: + where = (where) & clause + # 遍历 kwargs 参数生成的 WHERE 子句,并合并到之前的表达式中 + for clause in cls.where_clause(kwargs): + if where is None: + where = clause + else: + where = (where) & clause + return where + + @classmethod + def where_clause(cls, filter): + """ + 根据提供的过滤条件生成对应的查询条件。 + + 参数: + - cls: 类对象,用于查找属性与过滤条件匹配。 + - filter: 字典对象,包含需要应用的过滤条件。 + + 返回值: + - 生成器对象,包含构建的查询条件。 + """ + for key in filter.keys(): + # 过滤条件值为空时,跳过 + if filter.get(key) is None: + continue + # 检查过滤条件中的键是否为类属性 + if not hasattr(cls, key): + # 如果键以's'结尾且去掉's'后的键是类属性,则生成包含子查询条件的生成器项 + if key[-1] == "s" and hasattr(cls, key[:-1]): + values = filter.get(key) + if not isinstance(values, list): + values = [values] + yield getattr(cls, key[:-1]).in_(values) + else: + values = filter.get(key) + if isinstance(values, list): + yield getattr(cls, key).in_(values) + else: + # 为类属性生成等于过滤值的查询条件的生成器项 + yield getattr(cls, key) == filter.get(key) diff --git a/evaluation/eval-lib/databases/mysql/models/models.py b/evaluation/eval-lib/databases/mysql/models/models.py new file mode 100644 index 0000000000000000000000000000000000000000..352d8f6a2cac05a74b341ef0914d2f47372f45ee --- /dev/null +++ b/evaluation/eval-lib/databases/mysql/models/models.py @@ -0,0 +1,114 @@ +import datetime +from peewee import CharField, DateTimeField, IntegerField + +from .base import BaseModel +from ..const import COMPONENT_TYPE_UNKNOWN, COMPONENT_TYPE_DF_AGENT, COMPONENT_TYPE_DF_SERVER +from ..db import db +from ....source.dictonary import Dictionary + + +class CaseRecord(BaseModel): + """ + 测试用例记录类 + + 属性: + uuid: 用例唯一标识符,字符串类型,最大长度64,不能为空 + case_name: 用例名称,字符串类型,最大长度64,不能为空 + case_params: 用例参数,字符串类型,最大长度1024,不能为空 + agent_type: 采集器类型,字符串类型,最大长度64,不能为空 + user: 执行用户,字符串类型,可以为空 + runner_commit_id: 执行器提交ID,字符串类型,最大长度64,不能为空 + runner_image_tag: 执行器镜像标签,字符串类型,最大长度64,不能为空 + status: 执行状态,整数类型,不能为空 + deleted: 删除状态,整数类型,不能为空 + created_at: 创建时间,日期时间类型,默认为当前时间 + """ + + uuid = CharField(max_length=64, unique=True, null=False) + name = CharField(max_length=64, null=False) + case_name = CharField(max_length=64, null=False) + case_params = CharField(max_length=1024, null=False) + agent_type = CharField(max_length=64, null=False) + user = CharField(null=True) + runner_commit_id = CharField(max_length=64, null=True) + runner_image_tag = CharField(max_length=64, null=True) + status = IntegerField(null=False) + deleted = IntegerField(null=False, default=0) + created_at = DateTimeField( + formats='%Y-%m-%d %H:%M:%S', + default=datetime.datetime.now() + datetime.timedelta(hours=8) + ) + + class Meta: + table_name = 'case_record' + database = db + + def _get_trans_value(self, key): + value = super()._get_trans_value(key) + if key == "case_name": + if value: + trans_case_name = Dictionary().CASE_DICTIONARY.get(value) + if not trans_case_name: + trans_case_name = Dictionary( + ).CASE_GROUP_DICTIONARY.get(value) + if trans_case_name: + return trans_case_name[1] + elif key == "agent_type": + if value: + trans_value = Dictionary().AGENT_TYPE_DICTIONARY.get(value) + if trans_value: + return trans_value[0] + return value + + +class CaseReport(BaseModel): + # 暂未启用 + """ + 测试例报表类 + + 属性: + case_uuid: 测试用例唯一标识符,字符串类型,最大长度64,不能为空 + report_path: 报告路径,字符串类型,最大长度64,不能为空 + created_at: 创建时间,日期时间类型,默认为当前时间 + """ + + case_uuid = CharField(max_length=64, null=False) + report_path = CharField(max_length=64, null=False) + created_at = DateTimeField( + formats='%Y-%m-%d %H:%M:%S', default=datetime.datetime.now() + ) + + class Meta: + table_name = 'case_report' + database = db + + +class Component(BaseModel): + # 暂未启用 + """ + 测试组件类 + + 属性: + case_uuid: 关联的测试用例唯一标识符,字符串类型,最大长度64,不能为空 + name: 组件名称,字符串类型,最大长度64,不能为空 + type: 组件类型,整数类型,不能为空 + config: 组件配置,字符串类型,最大长度1024,可以为空 + commit_id: 组件提交ID,字符串类型,可以为空 + image_tag: 组件镜像标签,字符串类型,可以为空 + created_at: 创建时间,日期时间类型,默认为当前时间 + """ + + case_uuid = CharField(max_length=64, null=False) + name = CharField(max_length=64, null=False) + type = IntegerField( + null=False, default=COMPONENT_TYPE_UNKNOWN, choices=[ + COMPONENT_TYPE_UNKNOWN, COMPONENT_TYPE_DF_AGENT, + COMPONENT_TYPE_DF_SERVER + ] + ) + config = CharField(max_length=1024, null=True) + commit_id = CharField(null=True) + image_tag = CharField(null=True) + created_at = DateTimeField( + formats='%Y-%m-%d %H:%M:%S', default=datetime.datetime.now() + ) diff --git a/evaluation/eval-lib/databases/redis/const.py b/evaluation/eval-lib/databases/redis/const.py new file mode 100644 index 0000000000000000000000000000000000000000..01e926a8393222c485fafffb8f82110b1abde5bf --- /dev/null +++ b/evaluation/eval-lib/databases/redis/const.py @@ -0,0 +1,10 @@ +RUNNER_TIMEOUT = 3600 * 24 +RUNNER_KEY = "runner" +GLOBAL_LOCK = "get_runner_info" + +CASE_STATUS_INIT = 'init' +CASE_STATUS_RUNNING = 'running' +CASE_STATUS_COMPLETED = 'completed' +CASE_STATUS_CANCELLED = 'cancelled' +CASE_STATUS_PAUSED = 'paused' +CASE_STATUS_FORCE_END = 'end' \ No newline at end of file diff --git a/evaluation/eval-lib/databases/redis/redis_db.py b/evaluation/eval-lib/databases/redis/redis_db.py new file mode 100644 index 0000000000000000000000000000000000000000..df97a2b78cc8a48bf9218650885d99773be7c64d --- /dev/null +++ b/evaluation/eval-lib/databases/redis/redis_db.py @@ -0,0 +1,66 @@ +import redis +import time +import uuid +from redis.exceptions import WatchError + + +class RedisDB(): + + + def __init__( + self, host, port, + password, db, + max_connections, + ) -> None: + self.host = host + self.port = port + self.password = password + self.db = db + self.conn_pool = redis.ConnectionPool( + host=self.host, port=self.port, max_connections=max_connections, + password=self.password, db=self.db + ) + + def acquire_lock(self, lockname, acquite_timeout=30, time_out=20): + """ + :param lockname: Name of the lock + :param acquire_timeout: Timeout for lock acquisition, default 30 seconds. + :param lock_timeout: Lock timeout, default 20 seconds + :return: uuid + """ + identifier = str(uuid.uuid4()) + end = time.time() + acquite_timeout + conn = redis.Redis(connection_pool=self.conn_pool) + while time.time() < end: + if conn.setnx(lockname, identifier): + # Set the expiration time of the key and automatically release the lock when it expires + conn.expire(lockname, time_out) + return identifier + # Resetting the expiration time of a lock when it has not been set + elif conn.ttl(lockname) == -1: + conn.expire(lockname, time_out) + time.sleep(0.001) + return identifier + + def release_lock(self, lockname, identifier): + """ + :param lockname: Name of the lock + :param identifier: Lock Identification + """ + conn = redis.Redis(connection_pool=self.conn_pool) + with conn.pipeline() as pipe: + while True: + try: + # If the key is changed by another client, the transaction throws a WatchError exception. + pipe.watch(lockname) + iden = pipe.get(lockname) + if iden and iden.decode('utf-8') == identifier: + pipe.multi() + pipe.delete(lockname) + pipe.execute() + return True + pipe.unwatch() + break + except WatchError: + pass + return False diff --git a/evaluation/eval-lib/databases/redis/runner_info.py b/evaluation/eval-lib/databases/redis/runner_info.py new file mode 100644 index 0000000000000000000000000000000000000000..2c51bb65ba6b637a5e9ea65ccb6b56b117c9014c --- /dev/null +++ b/evaluation/eval-lib/databases/redis/runner_info.py @@ -0,0 +1,94 @@ +from .redis_db import RedisDB +import redis +from . import const + + + + +class RedisRunnerInfo(RedisDB): + + + def __init__( + self, host, port, + password, db, + max_connections, + ) -> None: + super().__init__( + host, port, + password, db, + max_connections, + ) + + def init_runner_info(self, uuid): + runner_info = { + "uuid": uuid, + "case-control-status": const.CASE_STATUS_RUNNING, + "runner-status": const.CASE_STATUS_INIT, + "case-status": const.CASE_STATUS_INIT, + } + conn = redis.Redis(connection_pool=self.conn_pool) + runner_key_name = f"{const.RUNNER_KEY}-{uuid}" + conn.hmset(runner_key_name, runner_info) + # runner timeout + conn.expire(runner_key_name, const.RUNNER_TIMEOUT) + + def update_runner_info(self, uuid, info: dict): + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + conn = redis.Redis(connection_pool=self.conn_pool) + # update = False + for k, v in info.items(): + if conn.hget(key_name, k) != v: + conn.hset(key_name, k, v) + # update = True + # if update: + # updated = int(time.time()) + # conn.hset(key_name, "updated_time", updated) + self.release_lock(const.GLOBAL_LOCK, lock) + + def get_runner_info(self, uuid) -> dict: + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + runner_info = {} + conn = redis.Redis(connection_pool=self.conn_pool) + hash_all = conn.hgetall(key_name) + if hash_all: + for k, v in hash_all.items(): + runner_info[k.decode()] = v.decode() + self.release_lock(const.GLOBAL_LOCK, lock) + return runner_info + + def delete_runner_info(self, uuid): + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + conn = redis.Redis(connection_pool=self.conn_pool) + conn.delete(key_name) + self.release_lock(const.GLOBAL_LOCK, lock) + + def pause_case(self, uuid): + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + conn = redis.Redis(connection_pool=self.conn_pool) + conn.hset(key_name, "case-control-status", const.CASE_STATUS_PAUSED) + self.release_lock(const.GLOBAL_LOCK, lock) + + def cancel_case(self, uuid): + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + conn = redis.Redis(connection_pool=self.conn_pool) + conn.hset(key_name, "case-control-status", const.CASE_STATUS_CANCELLED) + self.release_lock(const.GLOBAL_LOCK, lock) + + def resume_case(self, uuid): + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + conn = redis.Redis(connection_pool=self.conn_pool) + conn.hset(key_name, "case-control-status", const.CASE_STATUS_RUNNING) + self.release_lock(const.GLOBAL_LOCK, lock) + + def end_case(self, uuid): + key_name = f"{const.RUNNER_KEY}-{uuid}" + lock = self.acquire_lock(const.GLOBAL_LOCK) + conn = redis.Redis(connection_pool=self.conn_pool) + conn.hset(key_name, "case-control-status", const.CASE_STATUS_FORCE_END) + self.release_lock(const.GLOBAL_LOCK, lock) \ No newline at end of file diff --git a/evaluation/eval-lib/model/base.py b/evaluation/eval-lib/model/base.py new file mode 100644 index 0000000000000000000000000000000000000000..516ebe4eba0f2776b1e03f0e0c9463f358ff6ba6 --- /dev/null +++ b/evaluation/eval-lib/model/base.py @@ -0,0 +1,60 @@ +from ..common.exceptions import BadRequestException +from . import const + + +class BaseStruct: + + KEYS = [] + + def __init__(self, json_data: dict = None, **kwargs): + if not json_data: + json_data = {} + self.init(**json_data, **kwargs) + + def init(self, **kwargs): + for key in self.KEYS: + if key in kwargs: + setattr(self, key, kwargs.pop(key)) + else: + setattr(self, key, None) + + def __getattr__(self, key): + if key in self.KEYS: + return None + else: + raise AttributeError + + def __str__(self): + return " ".join([f"{key}:{getattr(self, key)}" for key in self.KEYS]) + + def to_json(self): + return {key: getattr(self, key) for key in self.KEYS} + + def keys(self): + yield from self.KEYS + + +class CaseParams(BaseStruct): + + KEYS = [ + "uuid", "case_name", "process_num", "status", "runner_image_tag", + "agent_type" + ] + + def init(self, **kwargs): + self.uuid = kwargs.get("uuid", None) + self.case_name = kwargs.get("case_name", None) + self.process_num = kwargs.get("process_num", 1) + self.status = int( + kwargs.get("status", const.CASE_PARAMS_STATUS_UNKNOWN) + ) + self.runner_image_tag = kwargs.get("runner_image_tag", "latest") + self.agent_type = kwargs.get("agent_type", None) + + def is_valid(self): + # TODO + if not self.uuid: + raise BadRequestException("bad request not uuid") + if self.status not in const.CASE_PARAMS_STATUS_LIST: + raise BadRequestException(f"bad request status {self.status}") + return True diff --git a/evaluation/eval-lib/model/const.py b/evaluation/eval-lib/model/const.py new file mode 100644 index 0000000000000000000000000000000000000000..eb79cce5825d34eea9a66bc733a88b75c726f747 --- /dev/null +++ b/evaluation/eval-lib/model/const.py @@ -0,0 +1,17 @@ +CASE_PARAMS_STATUS_UNKNOWN = 0 +CASE_PARAMS_STATUS_CREATE = 1 +CASE_PARAMS_STATUS_PAUSE = 2 +CASE_PARAMS_STATUS_CANCEL = 3 +CASE_PARAMS_STATUS_RESUME = 4 +CASE_PARAMS_STATUS_RESTART = 5 +CASE_PARAMS_STATUS_FROCE_END = 9 + +CASE_PARAMS_STATUS_LIST = [ + CASE_PARAMS_STATUS_CREATE, CASE_PARAMS_STATUS_PAUSE, + CASE_PARAMS_STATUS_CANCEL, CASE_PARAMS_STATUS_RESTART, + CASE_PARAMS_STATUS_RESUME, CASE_PARAMS_STATUS_FROCE_END +] + +RESULT_TYPE_LOG_RAW = 1 +RESULT_TYPE_PERFORMANCE_MD = 2 +RESULT_TYPE_PERFORMANCE_YAML = 21 diff --git a/evaluation/eval-lib/model/exec_params.py b/evaluation/eval-lib/model/exec_params.py new file mode 100644 index 0000000000000000000000000000000000000000..b3917f21b81471ec906468e5e49c037d67e12b68 --- /dev/null +++ b/evaluation/eval-lib/model/exec_params.py @@ -0,0 +1,22 @@ +from .base import BaseStruct +from ..common.exceptions import BadRequestException + + +class TestExecParams(BaseStruct): + + KEYS = ["case_name", "keys", "values"] + + def is_valid(self): + # TODO + if not self.case_name: + raise BadRequestException( + "bad request test exec params not case_name" + ) + if not self.keys: + raise BadRequestException("bad request test exec params not keys") + if not self.values: + raise BadRequestException( + "bad request test exec params not values" + ) + + return True diff --git a/evaluation/eval-lib/source/agent.py b/evaluation/eval-lib/source/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..8f930ef1d929abb01d6eecef59433ef1331e6772 --- /dev/null +++ b/evaluation/eval-lib/source/agent.py @@ -0,0 +1,7 @@ +from .base import BaseMeta + + +class CaseDictionary(metaclass=BaseMeta): + + # --------------------------- agent type --------------------------- + AGENT_TYPE_DICTIONARY = {"deepflowce": ["Deepflow-Agent(CE)"]} diff --git a/evaluation/eval-lib/source/base.py b/evaluation/eval-lib/source/base.py new file mode 100644 index 0000000000000000000000000000000000000000..81eaff4e41034ede1c4db164e7ccbf7d3992cea0 --- /dev/null +++ b/evaluation/eval-lib/source/base.py @@ -0,0 +1,10 @@ +# 元类,构造字典映射 +class BaseMeta(type): + + def __new__(cls, name, bases, dct): + mappings = {} + for k, v in dct.items(): + if k.endswith("_DICTONARY"): + mappings[k] = v + dct['__mappings__'] = mappings + return super().__new__(cls, name, bases, dct) diff --git a/evaluation/eval-lib/source/case.py b/evaluation/eval-lib/source/case.py new file mode 100644 index 0000000000000000000000000000000000000000..0705c83e0030c34bbe8a11580bedcf0b07bd3165 --- /dev/null +++ b/evaluation/eval-lib/source/case.py @@ -0,0 +1,66 @@ +from .base import BaseMeta +from ..model import const as model_const +from ..databases.mysql import const as db_const + + +class CaseDictionary(metaclass=BaseMeta): + # ------------------- case name ----------------------------- + CASE_DICTIONARY = { + # [测试例名称,测试例路径,测试例中文名称] + "performance_analysis_nginx_http": [ + "performance_analysis/nginx", "性能分析-极端高性能场景(nginx)" + ], + "performance_analysis_istio": [ + "performance_analysis/istio", "性能分析-典型云原生微服务场景" + ], + #"runner_test": [ + # "runner_test", + # "api测试用例" + #], + } + + CASE_GROUP_DICTIONARY = { + # [测试例组名称, 测试例组路径, 测试例组中文名称] + "performance_analysis": ["performance_analysis", "性能分析"], + } + + # -------------------- case status ------------------------- + CASE_STATUS_SUPPORT_UPDATE_DICTIONARY = { + # [测试例修改状态int,名称,所在哪些状态时支持更新] + model_const.CASE_PARAMS_STATUS_PAUSE: [ + "pause", [db_const.CASE_RECORD_STATUS_STARTED] + ], + model_const.CASE_PARAMS_STATUS_CANCEL: [ + "cancel", + [ + db_const.CASE_RECORD_STATUS_STARTED, + db_const.CASE_RECORD_STATUS_PAUSED + ] + ], + model_const.CASE_PARAMS_STATUS_RESUME: [ + "resume", [db_const.CASE_RECORD_STATUS_PAUSED] + ], + # model_const.CASE_PARAMS_STATUS_FROCE_END: [ + # "force_end", [ + # db_const.CASE_RECORD_STATUS_STARTED, + # db_const.CASE_RECORD_STATUS_STARTING, + # db_const.CASE_RECORD_STATUS_PENDING, + # db_const.CASE_RECORD_STATUS_PAUSED, + # db_const.CASE_RECORD_STATUS_PAUSING, + # db_const.CASE_RECORD_STATUS_FINISHED, + # db_const.CASE_RECORD_STATUS_STOPPING, + # ] + # ], + } + CASE_STATUS_DICTIONARY = { + db_const.CASE_RECORD_STATUS_INIT: ["Init"], + db_const.CASE_RECORD_STATUS_STARTING: ["Starting"], + db_const.CASE_RECORD_STATUS_STARTED: ["Running"], + db_const.CASE_RECORD_STATUS_PENDING: ["Pending"], + db_const.CASE_RECORD_STATUS_PAUSED: ["Paused"], + db_const.CASE_RECORD_STATUS_PAUSING: ["Pausing"], + db_const.CASE_RECORD_STATUS_FINISHED: ["Finished"], + db_const.CASE_RECORD_STATUS_STOPPING: ["Stopping"], + db_const.CASE_RECORD_STATUS_ERROR: ["Error"], + db_const.CASE_RECORD_STATUS_EXCEPTION: ["Exception"], + } diff --git a/evaluation/eval-lib/source/dictonary.py b/evaluation/eval-lib/source/dictonary.py new file mode 100644 index 0000000000000000000000000000000000000000..1aae3626618fc018ef9831a52dc30998cdf035d5 --- /dev/null +++ b/evaluation/eval-lib/source/dictonary.py @@ -0,0 +1,39 @@ +import inspect +import importlib +import os + +CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +class DictionaryMeta(type): + + def __new__(cls, name, bases, dct): + all_classes = [] + for file_name in os.listdir(CURRENT_DIR): + if not file_name.endswith('.py') or file_name.startswith('_'): + continue + module_name = file_name[:file_name.rfind('.py')] + module = importlib.import_module(f"eval_lib.source.{module_name}") + + # 使用inspect检查类 + all_classes.extend([ + cls[1] for cls in inspect.getmembers(module, inspect.isclass) + ]) + + # 同步所有Dictionary类中的字典 + mappings = {} + for dct_cls in all_classes: + for attr in dir(dct_cls): + if attr.endswith("_DICTIONARY"): + mappings[attr] = getattr(dct_cls, attr) + dct.update(mappings) + return super().__new__(cls, name, bases, dct) + + +class Dictionary(metaclass=DictionaryMeta): + + @classmethod + def update(cls, key, value): + key = f"{key.upper()}_DICTIONARY" + if hasattr(cls, key): + setattr(cls, key, value) diff --git a/evaluation/eval-lib/source/exec_params.py b/evaluation/eval-lib/source/exec_params.py new file mode 100644 index 0000000000000000000000000000000000000000..fd87fa35848344f6c5e1f40301902f28c8a6d4b8 --- /dev/null +++ b/evaluation/eval-lib/source/exec_params.py @@ -0,0 +1,9 @@ +from .base import BaseMeta + + +class ExecParamsDictionary(metaclass=BaseMeta): + + EXEC_PARAMS_DICTIONARY = { + # [] + "performance_analysis_nginx_http": [] + } diff --git a/evaluation/eval-runner/Dockerfile b/evaluation/eval-runner/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c059cc2d2d093146a86f545057b86e5a55fb107b --- /dev/null +++ b/evaluation/eval-runner/Dockerfile @@ -0,0 +1,51 @@ +# evaluation +# +# VERSION 1.0.0 + +# 构建层 +FROM hub.deepflow.yunshan.net/dev/python:3.8.19-slim-bullseye AS builder +RUN apt-get update && \ + apt-get install --no-install-suggests \ + --no-install-recommends --yes \ + python3-venv=3.9.2-3 \ + build-essential=12.9 \ + gcc=4:10.2.1-1 \ + libpython3-dev=3.9.2-3 \ + libpq-dev=13.14-0+deb11u1 \ + default-libmysqlclient-dev=1.0.7 \ + libmariadb-dev=1:10.5.23-0+deb11u1 \ + libmariadb3=1:10.5.23-0+deb11u1 \ + make=4.3-4.1 \ + && \ + python3 -m venv /root/venv && \ + /root/venv/bin/pip install -U pip==24.0 +# 安装 Base pip 依赖包 +FROM builder AS builder-venv-base +RUN /root/venv/bin/pip install cffi==1.16.0 --trusted-host mirrors.aliyun.com --index-url https://mirrors.aliyun.com/pypi/simple/ +# 安装自定义 pip 依赖包 +FROM builder-venv-base AS builder-venv-custom +COPY requirements.txt /root/requirements.txt +RUN /root/venv/bin/pip install --disable-pip-version-check \ + --no-cache-dir \ + --trusted-host mirrors.aliyun.com \ + --index-url https://mirrors.aliyun.com/pypi/simple/ \ + -r /root/requirements.txt + +# 最终运行层 +FROM hub.deepflow.yunshan.net/dev/python:3.8.19-slim-bullseye AS runner +ARG TARGETARCH +RUN --mount=type=bind,target=/temp,from=builder-venv-custom,source=/ \ + BUILD_ARCH=$(echo ${TARGETARCH}|sed 's|amd64|x86_64|'|sed 's|arm64|aarch64|') && \ + cp -raf /temp/usr/lib/${BUILD_ARCH}-linux-gnu/libmariadb.* /usr/lib/${BUILD_ARCH}-linux-gnu/ && \ + cp -raf /temp/usr/lib/${BUILD_ARCH}-linux-gnu/libmariadb3/ /usr/lib/${BUILD_ARCH}-linux-gnu/libmariadb3/ && \ + cp -raf /temp/usr/lib/${BUILD_ARCH}-linux-gnu/libmysql* /usr/lib/${BUILD_ARCH}-linux-gnu/ && \ + cp -raf /temp/root/venv /root/venv && \ + apt-get update && \ + apt-get install --no-install-suggests \ + --no-install-recommends --yes \ + vim-tiny=2:8.2.2434-3+deb11u1 curl=7.74.0-1.3+deb11u11 +# 复制代码 +COPY ./etc/eval-runner.yaml /etc/ +COPY ./eval-runner/ /root/eval-runner/ +# Run +CMD /root/venv/bin/python3 -u /root/runner/eval-runner.py \ No newline at end of file diff --git a/evaluation/eval-runner/etc/eval-runner.yaml b/evaluation/eval-runner/etc/eval-runner.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f747f6371b876157df33d1b127260bb3e5f5a11 --- /dev/null +++ b/evaluation/eval-runner/etc/eval-runner.yaml @@ -0,0 +1,55 @@ +listen_port: 10083 # HTTP Listen Port +runner_data_dir: "" +global_ssh_port: 22 +global_ssh_username: root +global_ssh_password: "" + +case_params: + uuid: + case_name: + process_num: 1 + +agent-tools: + deepflowce: + name: Deepflow-Agent(CE) + # deploy_type: k8s / workload + deploy_type: k8s + + # deepflow-server 是否添加了云平台 + docking_platform: 0 + cloud_info: + vpc_name: + domain_name: + server_ip: + server_ssh_port: 22 + server_ssh_username: + server_ssh_password: + version: latest + config: + max_cpus: 1 + max_memory: 1024 + +platform-tools: + type: + aliyun: + access_key: + secret_key: + region: + +fixed_host: + performance_analysis_traffic_ip: + performance_analysis_nginx_ip: + performance_analysis_istio_ip: + +mysql: + host: + port: + user: + password: + db: + +redis: + host: + port: + password: + db: \ No newline at end of file diff --git a/evaluation/eval-runner/eval-runner/agent_tools/base.py b/evaluation/eval-runner/eval-runner/agent_tools/base.py new file mode 100644 index 0000000000000000000000000000000000000000..1278832ad1edac2a776579f529f566cb398b7bb6 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/agent_tools/base.py @@ -0,0 +1,115 @@ +from eval_lib.common.ssh import SSHPool +from common.module import AgentMeta +from common.config import conf + + +class Base(object): + + def __init__(self) -> None: + self._ssh_pool = SSHPool() + + def get_ssh_pool(self): + return self._ssh_pool + + def init(self, agent_name: str, meta: AgentMeta): + """初始化采集器参数的函数 + :param meta: 采集器参数的元数据 + :param agent_name: 采集器名称,唯一标识 + :return: + """ + self._ssh_pool.default_port = meta.ssh_port + self._ssh_pool.default_username = meta.ssh_username + self._ssh_pool.default_password = meta.ssh_password + self.agent_ip = meta.agent_ip + self.agent_version = meta.version + self.agent_name = agent_name + self.agent_type = conf.case_params.agent_type + # 配置文件中,agent_tools包含的参数 + self.custom_param: dict = conf.agent_tools[self.agent_type] + + +class AgentBase(Base): + """所有采集器类的基类""" + + def __init__(self) -> None: + super().__init__() + # ----------------以下属性待继承类副职---------------- + + # 1.采集器进程名,用于telegraf获取进程负载数据 + AGENT_PROCESS_NAME = "" + + # ----------------以下方法待继承类实现---------------- + # 发生预期外失败时,直接assert退出 + + # 1. 部署采集器, 包含功能为拉镜像、下载包以及部署等 + def deploy(self): + """部署采集器 + :return: + """ + pass + + # 2. 启动采集器, 包含功能为启动进程、镜像 + def start(self): + """启动采集器 + :return: + """ + pass + + # 3. 停止采集器,包含功能停止进程、镜像 + def stop(self): + """停止采集器,保留功能,当前直接使用uninstall + :return: + """ + pass + + # 4. 卸载采集器,包含功能为删除服务、镜像 + def uninstall(self): + """卸载采集器 + :return: + """ + pass + + # 5. 重启采集器,包含功能为停止进程、镜像、启动进程、镜像 + def restart(self): + """重启采集器 + :return: + """ + pass + + # 6. 清理采集器及其产生的资源 + def clear(self): + """清理采集器及其产生的资源 + :return: + """ + pass + + # 7. 检查采集器状态,发生无法使用的异常时直接assert + def ensure_agent_status_available(self): + """检查采集器状态保证其正常运行 + :return: + """ + pass + + # 8. 检查采集器在特定时间是否发生异常重启 + def check_abnormal_restart_time(self, start_time, end_time) -> bool: + """检查采集器在特定时间是否发生异常重启 + :return: bool + """ + pass + + # 9. 配置采集器 + def configure_agent(self, config_dict: dict): + """配置采集器,将需要的配置写入,配置文件内容在self.custom_param中, config_dict当前未启用 + :param config_dict: 配置参数 + :return: + """ + pass + + # 10. 获取特定时间采集器数据, 保留函数,未使用 + def get_metric_data_by_agent(self, start_time, end_time) -> dict: + """获取特定时间采集器采集到的数据,例如bps,pps,丢包等等, 保留函数,未使用 + :param start_time: 结束时间 + :param end_time: 开始时间 + :return: 字典形式的数据 + """ + pass diff --git a/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/deepflow_agent.py b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/deepflow_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..60e628e5110ee1ba74b45a4d1574d7ef696b0042 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/deepflow_agent.py @@ -0,0 +1,435 @@ +import yaml +import os +import time +import copy + +from datetime import datetime, timedelta + +from agent_tools.base import AgentBase +from agent_tools.deepflow_agent import url +from agent_tools.deepflow_agent.deepflow_server import DeepflowServer +from common import utils as common_utils +from eval_lib.common.logger import get_logger + +log = get_logger() + + +class DeeepflowAgent(AgentBase): + AGENT_PROCESS_NAME = "deepflow-agent" + + def __init__(self): + super().__init__() + self.deepflow_server = DeepflowServer() + self.group_name: str = None + self.vtap_lcuuid: str = None + self.group_id: str = None + self.version_commands = { + "v6.2": "--version 6.2.6", + "v6.3": "--version 6.3.9", + "v6.4": "--version 6.4.9", + } + self.deploy_status = False + + def deploy(self): + """部署agent + + """ + # 初始化deepflow_server + server_ip = self.custom_param.get("server_ip", None) + if server_ip is None: + log.error("deepflow_server_ip is None") + assert False + self.group_name = self.agent_name + self.deepflow_server.init( + server_ip, + self.custom_param.get("server_ssh_port", 22), + self.custom_param.get("server_ssh_username"), + self.custom_param.get("server_ssh_password"), + ) + # 部署agent + if self.custom_param["deploy_type"] == "k8s": + self.deploy_k8s_agent() + elif self.custom_param["deploy_type"] == "workload": + self.deploy_workload_agent() + self.deploy_status = True + + def deploy_k8s_agent(self): + """部署k8s类型采集器 + 通过sealos 安装k8s + """ + common_utils.install_k8s( + vm_ip=self.agent_ip, + ssh_pool=self._ssh_pool, + ) + if common_utils.check_helm_chart( + vm_ip=self.agent_ip, + chart_name="deepflow-agent", + namespace="deepflow", + ssh_pool=self._ssh_pool, + ): + log.info( + "helm chart is already installed, re-install deepflow-agent" + ) + self.uninstall() + time.sleep(30) + common_utils.upload_files( + vm_ip=self.agent_ip, + local_path="agent_tools/deepflow_agent/file/deepflow-agent.yaml", + remote_path=".", + ssh_pool=self._ssh_pool, + ) + # self.replace_registry_to_public() + ssh_client = self._ssh_pool.get(self.agent_ip) + ssh_client.exec_command( + f"sed -i '2i\ tag: {self.agent_version}' deepflow-agent.yaml" + ) + version = self.version_commands.get(self.agent_version, "") + if self.custom_param.get("docking_platform", ""): + cloud_info = self.custom_param["cloud_info"] + cluster_id = self.deepflow_server.cloud_add_subdomain( + vpc_name=cloud_info["vpc_name"], + domain_name=cloud_info["domain_name"], + subdomain_name=self.agent_name, + ) + cmd_install_agent = f'''sudo helm install deepflow-agent -n deepflow deepflow-agent/deepflow-agent {version} --create-namespace \ + --set deepflowServerNodeIPS={{{self.deepflow_server.server_ip}}} --set deepflowK8sClusterID={cluster_id} -f deepflow-agent.yaml''' + else: + cmd_install_agent = f'''sudo helm install deepflow-agent -n deepflow deepflow-agent/deepflow-agent {version} --create-namespace \ + --set deepflowServerNodeIPS={{{self.deepflow_server.server_ip}}} -f deepflow-agent.yaml''' + cmd_list = [ + "sudo helm repo add deepflow-agent https://deepflow-ce.oss-cn-beijing.aliyuncs.com/chart/stable", + "sudo helm repo update deepflow-agent", cmd_install_agent + ] + cmd = " && ".join(cmd_list) + _, stdout, stderr = ssh_client.exec_command(cmd) + log.info(f"Install agent by k8s cmd: {cmd}") + logs = stdout.readlines() + if logs and 'deepflow-agent Host listening port:' in logs[-1]: + log.info('Deploy the deepflow-agent successfully in kubernetes') + else: + log.error( + f"Deploy the deepflow-agent failed, logs is {stderr.read().decode()}" + ) + assert False + + def deploy_workload_agent(self): + """部署workload类型采集器 + """ + if not self.custom_param.get("docking_platform", ""): + self.platform_enabled() + system_name, system_version = common_utils.get_system_info( + vm_ip=self.agent_ip, + ssh_pool=self._ssh_pool, + ) + if common_utils.check_ssh_command( + self.agent_ip, "deepflow-agent -v", self._ssh_pool + ): + self.uninstall() + common_utils.install_unzip(self.agent_ip, self._ssh_pool) + if 'CentOS' in system_name or 'Alibaba' in system_name: + agent_url = url.deepflow_agent_rpm_lastest_url.replace( + "latest", self.agent_version + ) + install_cmd = f'''curl -O {agent_url} &&\ + sudo unzip deepflow-agent-rpm.zip &&\ + sudo rpm -ivh x86_64/deepflow-agent-1*.rpm &&\ + rm -rf x86_64/ + ''' + elif 'Ubuntu' in system_name and "14." in system_version: + agent_url = url.deepflow_agent_deb_lastest_url.replace( + "latest", self.agent_version + ) + install_cmd = f'''curl -O {agent_url} &&\ + sudo unzip deepflow-agent-deb.zip &&\ + sudo dpkg -i x86_64/deepflow-agent-*.upstart.deb &&\ + rm -rf x86_64/ + ''' + elif 'Ubuntu' in system_name and "14." not in system_version: + agent_url = url.deepflow_agent_deb_lastest_url.replace( + "latest", self.agent_version + ) + install_cmd = f'''curl -O {agent_url} &&\ + sudo unzip deepflow-agent-deb.zip &&\ + sudo dpkg -i x86_64/deepflow-agent-*.systemd.deb &&\ + rm -rf x86_64/ + ''' + elif 'Debian' in system_name: + agent_url = url.deepflow_agent_deb_lastest_url.replace( + "latest", self.agent_version + ) + install_cmd = f'''curl -O {agent_url} &&\ + sudo unzip deepflow-agent-deb.zip &&\ + sudo dpkg -i x86_64/deepflow-agent-*.systemd.deb &&\ + rm -rf x86_64/ + ''' + elif 'Anolis' in system_name: + agent_url = url.deepflow_agent_arm_rpm_lastest_url.replace( + "latest", self.agent_version + ) + install_cmd = f'''curl -O {agent_url} &&\ + sudo unzip deepflow-agent*.zip &&\ + sudo rpm -ivh aarch64/deepflow-agent-1*.rpm &&\ + rm -rf aarch64/ + ''' + else: + log.error(f'Unsupported system: {system_name}') + assert False + ssh_client = self._ssh_pool.get(self.agent_ip) + _, stdout, stderr = ssh_client.exec_command(install_cmd) + errs = stderr.read().decode() + exit_status = stdout.channel.recv_exit_status() + if exit_status == 0: + log.info( + f"deepflow-agent is installation successful. please start it" + ) + else: + log.error(f"deepflow-agent is installation failed. err: {errs}") + assert False + server_ip = self.deepflow_server.server_ip + bind_cmd = f"sudo sed -i 's/ - 127.0.0.1/ - {server_ip}/g' /etc/deepflow-agent.yaml" + if self.group_id: + bind_cmd = f"{bind_cmd} &&sudo sed -i '/vtap-group-id-request/s/.*/vtap-group-id-request: {self.group_id}/g' \ + /etc/deepflow-agent.yaml" + + _, stdout, stderr = ssh_client.exec_command(bind_cmd) + exit_status = stdout.channel.recv_exit_status() + if exit_status == 0: + log.info( + f"deepflow-agent bind server successful, server_ip is {server_ip}" + ) + else: + log.error( + f"deepflow-agent bind server failed. err: {stderr.read().decode()}" + ) + assert False + + def uninstall(self): + if self.custom_param["deploy_type"] == "k8s": + command = f"sudo helm uninstall deepflow-agent -n deepflow" + ssh_client = self._ssh_pool.get(self.agent_ip) + _, _, stderr = ssh_client.exec_command(command) + error = stderr.read().decode() + if error: + log.error(f"uninstall deepflow-agent error: {error}") + return + system_name, _ = common_utils.get_system_info( + vm_ip=self.agent_ip, + ssh_pool=self._ssh_pool, + ) + uninstall_cmd = "" + if 'CentOS' in system_name or 'Alibaba' in system_name: + uninstall_cmd = "sudo rpm -e deepflow-agent" + elif 'Ubuntu' in system_name: + uninstall_cmd = "sudo dpkg -r deepflow-agent" + elif 'Debian' in system_name: + uninstall_cmd = "sudo dpkg -r deepflow-agent" + elif 'Anolis' in system_name: + uninstall_cmd = "sudo rpm -e deepflow-agent" + else: + log.error(f'Unsupported system: {system_name}') + return + ssh_client = self._ssh_pool.get(self.agent_ip) + _, stdout, stderr = ssh_client.exec_command(uninstall_cmd) + exit_status = stdout.channel.recv_exit_status() + if exit_status == 0: + log.info('deepflow-agent uninstalled successfully on') + else: + log.error( + f'failed to install deepflow-agent on {stderr.read().decode()}' + ) + + def start(self): + if self.custom_param["deploy_type"] == "k8s": + log.info("k8s agent start is not supported") + return + ssh_client = self._ssh_pool.get(self.agent_ip) + check_cmd = 'sudo systemctl start deepflow-agent &&sudo systemctl status deepflow-agent' + _, stdout, stderr = ssh_client.exec_command(check_cmd) + output = stdout.read().decode() + if "Active: active (running)" in output: + log.info("deepflow agent successfully started and is running") + return True + else: + log.error( + f"deepflow-agent start failed, err: {stderr.read().decode()}" + ) + return False + + def stop(self): + if self.custom_param["deploy_type"] == "k8s": + log.info("k8s agent stop is not supported") + return + ssh_client = self._ssh_pool.get(self.agent_ip) + check_cmd = 'sudo systemctl stop deepflow-agent && sudo systemctl status deepflow-agent' + _, stdout, stderr = ssh_client.exec_command(check_cmd) + output = stdout.read().decode() + if "Active: inactive (dead)" in output: + log.info("deepflow agent successfully stopped") + return True + else: + log.error( + f"deepflow-agent stop failed, err: {stderr.read().decode()}" + ) + return False + + def restart(self): + if self.custom_param["deploy_type"] == "k8s": + log.info("k8s agent restart is not supported") + return + ssh_client = self._ssh_pool.get(self.agent_ip) + check_cmd = 'sudo systemctl restart deepflow-agent && sudo systemctl status deepflow-agent' + _, stdout, stderr = ssh_client.exec_command(check_cmd) + output = stdout.read().decode() + if "Active: active (running)" in output: + log.info(f"deepflow agent restarted successfully and is running") + return True + else: + log.error( + f"deepflow-agent restart failed, err: {stderr.read().decode()}" + ) + return False + + def clear(self): + if self.deploy_status: + self.uninstall() + if self.vtap_lcuuid: + self.deepflow_server.delete_vtap_by_lcuuid(self.vtap_lcuuid) + if self.group_id: + self.deepflow_server.delete_group_by_name(self.group_name) + + def ensure_agent_status_available(self): + self.vtap_lcuuid = self.deepflow_server.check_vtaps_list_by_ip( + agent_ip=self.agent_ip + ) + self.deepflow_server.check_analyzer_ip(agent_ip=self.agent_ip) + + def replace_registry_to_public(self): + ssh_client = self._ssh_pool.get(self.agent_ip) + cmd = f'''sed -i '/repository:/s/.*/ repository: deepflowce\/deepflow-agent/g' deepflow-agent.yaml''' + _, _, stderr = ssh_client.exec_command(cmd) + err = stderr.readlines() + if err: + log.error(f"Replace Registery Error: {err}") + + def check_abnormal_restart_time(self, start_time, end_time) -> bool: + """ 检查采集器在特定时间是否出现了重启 + :param start_time: 开始时间戳 + :param end_time: 结束时间戳 + :return: True or False, True表示出现了重启,False表示没有出现重启。 + """ + if self.custom_param["deploy_type"] == "k8s": + log.info("k8s agent check abnormal restart time is not supported") + return + ssh_client = self._ssh_pool.get(self.agent_ip) + _, stdout, _ = ssh_client.exec_command( + ''' sudo grep restart /var/log/deepflow-agent/deepflow-agent.log \ + |awk '{log.info substr($1, index($1, "2024")),$2}' ''' + ) + # 这里需要把时间转换为时间戳,然后判断时间戳是否在指定的时间范围内 + for line in stdout: + line = line.strip() + if not line: + continue + time_str = line.split(' ')[0] + # 日志时间是北京时间,需要减去8小时,才能转换为时间戳。 + time_obj = datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S.%f' + ) - timedelta(hours=8) + time_stamp = int(time_obj.timestamp()) + if start_time <= time_stamp <= end_time: + log.warning(f"deepflow-agent restarted at {time_str}") + return True + log.info(f"deepflow-agent did not restart") + return False + + def create_config_file_yaml( + self, config_dict: dict, file_path: str + ) -> str: + try: + existing_data = {"vtap_group_id": ""} + if "config" in self.custom_param: + # 读取env.yaml的配置 + existing_data.update(self.custom_param["config"]) + existing_data.update(config_dict) + with open(file_path, 'w') as file: + yaml.dump(existing_data, file) + except Exception as e: + log.error(f"Error: {repr(e)}") + + def apply_config_by_ctl(self, group_name): + server_ip = self.deepflow_server.server_ip + if self.group_id is None: + self.group_id = self.deepflow_server.create_group( + group_name=self.group_name + ) + ssh_client = self.deepflow_server._ssh_pool.get(server_ip) + # 通过deepflow-ctl载入配置 + config_cmd = [ + f"sed -i '/vtap_group_id:/s/.*/vtap_group_id: {self.group_id}/g' {group_name}.yaml", + f"sudo deepflow-ctl agent-group-config create -f {group_name}.yaml", + f"sudo rm -f {group_name}.yaml" + ] + config_cmd = " && ".join(config_cmd) + _, stdout, stderr = ssh_client.exec_command(config_cmd) + exit_status = stdout.channel.recv_exit_status() + if exit_status == 0: + log.info(f"deepflow-agent configure successful") + else: + log.error( + f"deepflow-agent configure failed {stderr.read().decode()}" + ) + assert False + + def update_group_config(self, config_dict: dict = None): + if config_dict is None: + config_dict = {} + else: + config_dict = copy.deepcopy(config_dict) + tmp_file_path = f"{self.group_name}.yaml" + self.create_config_file_yaml( + config_dict=config_dict, file_path=tmp_file_path + ) + common_utils.upload_files( + vm_ip=self.deepflow_server.server_ip, + local_path=tmp_file_path, + remote_path=f'{self.group_name}.yaml', + ssh_pool=self._ssh_pool, + ) + os.remove(tmp_file_path) + self.apply_config_by_ctl(group_name=self.group_name) + + def configure_agent(self, config_dict: dict = None): + """ 配置采集器 + :param config_dict: 自定义配置 + :return: + """ + self.update_group_config(config_dict=config_dict) + self.vtap_lcuuid = self.deepflow_server.check_vtaps_list_by_ip( + agent_ip=self.agent_ip + ) + self.deepflow_server.agent_join_in_group( + vtap_lcuuid=self.vtap_lcuuid, + group_name=self.group_name, + ) + + def platform_enabled(self): + config_dict = {"platform_enabled": 1} + self.custom_param["config"].update(config_dict) + self.update_group_config(config_dict=config_dict) + + def get_metric_data_by_agent(self, start_time, end_time): + vtap_info = {} + vtap_full_name = self.deepflow_server.get_vtap_full_name_by_ip( + self.agent_ip + ) + max_cpu = self.deepflow_server.get_vtap_max_cpu_usage( + vtap_full_name, start_time, end_time + ) + max_mem = self.deepflow_server.get_vtap_max_mem_usage( + vtap_full_name, start_time, end_time + ) + vtap_info = { + "agent.max_cpu": max_cpu, + "agent.max_mem": max_mem, + } + + return vtap_info diff --git a/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/deepflow_server.py b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/deepflow_server.py new file mode 100644 index 0000000000000000000000000000000000000000..d0bfa53e7ece519873bcdfc821834a6258f27084 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/deepflow_server.py @@ -0,0 +1,396 @@ +import time +import requests +import json + +from urllib.parse import urlencode + +from eval_lib.common.logger import get_logger +from eval_lib.common.ssh import SSHPool +from agent_tools.deepflow_agent import url + +log = get_logger() + + +class DeepflowServer(): + + def __init__(self) -> None: + self.server_ip = None + self.control_port = None + self.query_port = None + self._ssh_pool = SSHPool() + + def init( + self, server_ip, ssh_port=22, ssh_username=None, ssh_password=None + ): + """初始化控制器参数的函数 + :param meta: 控制器参数的元数据 + :return: + """ + self._ssh_pool.default_port = ssh_port + self._ssh_pool.default_username = ssh_username + self._ssh_pool.default_password = ssh_password + self.server_ip = server_ip + self.control_port = self.get_control_port() + self.query_port = self.get_query_port() + + def get_control_port(self, server_ip=None, retry_count=100): + server_ip = server_ip if server_ip else self.server_ip + ssh_client = self._ssh_pool.get(server_ip) + for _ in range(retry_count): + try: + log.info("Getting controller port") + _, stdout, stderr = ssh_client.exec_command( + '''sudo kubectl get svc -n deepflow | grep -o "20417:[0-9]*" | cut -d ":" -f 2 + ''', timeout=10 + ) + control_port = stdout.readline().strip() + if control_port: + log.info(f"Control port: {control_port}") + self.control_port = control_port + return control_port + except Exception as e: + log.error( + f"stderr: {stderr.read().decode('utf-8')}, Failed to get port: {e}" + ) + time.sleep(3) + log.error("Failed to get control port after retrying.") + return None + + def get_query_port(self, server_ip=None, retry_count=100): + server_ip = server_ip if server_ip else self.server_ip + ssh_client = self._ssh_pool.get(server_ip) + for _ in range(retry_count): + try: + log.info("Getting querier port") + _, stdout, stderr = ssh_client.exec_command( + '''sudo kubectl get svc -n deepflow | grep -o "20416:[0-9]*" | cut -d ":" -f 2 + ''', timeout=10 + ) + query_port = stdout.readline().strip() + if query_port: + log.info(f"Query port: {query_port}") + self.query_port = query_port + return query_port + except Exception as e: + log.error( + f"stderr: {stderr.read().decode('utf-8')}, Failed to get port: {e}" + ) + time.sleep(3) + log.error("Failed to get query port after retrying.") + return None + + def create_group(self, group_name, vtap_lcuuid=""): + '''Move the agent to the specified group + return: group_id + ''' + headers = {'Content-Type': 'application/json'} + url = "http://{}:{}/v1/vtap-groups/".format( + self.server_ip, self.control_port + ) + data = {"NAME": group_name, "VTAP_LCUUIDS": [vtap_lcuuid]} + log.info(f"Data to be sent: {data}") + try: + response = requests.post(url=url, headers=headers, json=data) + response_json: dict = response.json() + log.info("Response JSON: {}".format(response_json)) + agent_group_id = response_json["DATA"]["SHORT_UUID"] + if response_json.get("OPT_STATUS") == "SUCCESS" and agent_group_id: + return agent_group_id + else: + log.error( + f"Failed to add group with existing agent: {response_json}" + ) + assert False + except Exception as e: + log.error(f"Failed to add group with existing agent: {e}") + assert False + + def delete_group_by_name(self, group_name): + group_lcuuid = self.get_group_lcuuid_by_name(group_name) + delete_group_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtap_groups_api_prefix}/{group_lcuuid}/" + res = requests.delete(url=delete_group_url) + if res.status_code == 200: + log.info(f"delete group {group_name} successfully!") + else: + log.error( + f"delete group {group_name} failed! status code is {res.text}" + ) + + def delete_vtap_by_lcuuid(self, vtap_lcuuid): + delete_vtap_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtaps_api_prefix}/{vtap_lcuuid}/" + res = requests.delete(url=delete_vtap_url) + if res.status_code == 200: + log.info(f"delete vtap {vtap_lcuuid} successfully!") + else: + log.error( + f"delete vtap {vtap_lcuuid} failed! status code is {res.text}" + ) + + def get_group_lcuuid_by_name(self, group_name): + get_lcuuid_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtap_groups_api_prefix}" + group_lcuuid = "" + res = requests.get(url=get_lcuuid_url) + if res.status_code == 200: + group_list = res.json()['DATA'] + else: + log.error(f"get group list failed! status code is {res.text}") + for item in group_list: + if item["NAME"] == group_name: + group_lcuuid = item["LCUUID"] + log.info(f"group_lcuuid is {group_lcuuid}") + break + else: + log.error(f"group {group_name} does not exist!") + return group_lcuuid + + def get_group_members_lcuuid(self, group_lcuuid) -> list: + get_lcuuid_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtap_groups_api_prefix}" + f'/{group_lcuuid}/' + res = requests.get(url=get_lcuuid_url) + if res.status_code == 200: + vtaps_lcuuid_list = res.json()['DATA'][0]["VTAP_LCUUIDS"] + log.info(f"vtaps_lcuuid_list : {vtaps_lcuuid_list}") + return vtaps_lcuuid_list + + def agent_join_in_group(self, vtap_lcuuid, group_name): + group_lcuuid = self.get_group_lcuuid_by_name(group_name) + join_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtap_groups_api_prefix}" + f'/{group_lcuuid}/' + vtaps_list = self.get_group_members_lcuuid(group_lcuuid) + if vtap_lcuuid in vtaps_list: + log.info('agent has joined the group') + return + vtaps_list.append(vtap_lcuuid) + log.info(f'vtaps lcuuid of agent group is {vtaps_list}') + headers = {'Content-Type': 'application/json'} + datas = {"NAME": group_name, "VTAP_LCUUIDS": vtaps_list} + datas = json.dumps(datas) + response = requests.request( + "PATCH", join_url, headers=headers, data=datas + ) + res = response.json() + if res["OPT_STATUS"].upper() != "SUCCESS": + log.error(f"agent group {group_name} add vtap failed") + assert False + log.info("add vtap successful") + + def check_vtaps_list_by_ip(self, agent_ip, retry_count=15) -> str: + '''Loop to check if vtaps_list contains vtap by ip of vtap. + :param retry_count: required, number of checks + :param agent_ip: ip of agent + :return VTAP_LCUUID + ''' + for _ in range(retry_count): + log.info('Waiting for vtaps synchronization, about 60s') + try: + check_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtaps_api_prefix}" + response = requests.get(url=check_url) + last_vtaps_list = response.json().get('DATA', []) + for vt in last_vtaps_list: + if vt.get('LAUNCH_SERVER') == agent_ip: + log.info( + f'The vtap was synchronized successfully, the ip is {agent_ip}' + ) + return vt['LCUUID'] + except Exception as err: + log.error( + f'Error occurred during vtaps synchronization check: {err}' + ) + time.sleep(60) + log.error(f'vtaps synchronization failure, the ip is {agent_ip}') + log.error(f'Last vtaps list: {last_vtaps_list}') + assert False + + def check_analyzer_ip(self, agent_ip, retry_count=60): + ssh_client = self._ssh_pool.get(self.server_ip) + check_cmd = "sudo deepflow-ctl agent list|grep %s|awk '{print $5}'|xargs -I {} deepflow-ctl trisolaris.check --cip %s --cmac {} config|grep analyzer_ip|grep %s" % ( + agent_ip, agent_ip, self.server_ip + ) + for i in range(retry_count): + _, stdout, stderr = ssh_client.exec_command(check_cmd) + logs = stdout.readlines() + errs = stderr.readlines() + if len(logs) > 0: + log.info(f"successfully assign analyzer_ip, logs is {logs}") + break + log.info(f"Have been waiting for {10 * i} seconds") + if errs: + log.info(errs) + time.sleep(10) + else: + log.error("Timeout! agent not assigned analyzer_ip") + assert False + + def get_vtap_full_name_by_ip(self, agent_ip): + '''Get vtap_full_name by the ip of the vtaps + :param agent_ip: required, The ip of vtaps + ''' + try: + vtaps_url = f"{url.protocol}{self.server_ip}:{self.control_port}{url.vtaps_api_prefix}" + response = requests.get(url=vtaps_url) + # 检查异常响应状态 4xx or 5xx + response.raise_for_status() + + json_data = response.json().get('DATA', []) + for vt in json_data: + if vt.get('LAUNCH_SERVER') == agent_ip: + return vt['NAME'] + + log.error( + f"Failed to get vtap name for IP: {agent_ip}. No matching entry found." + ) + assert False + except requests.exceptions.RequestException as e: + log.error(f"Error occurred while fetching vtap information: {e}") + assert False + + def get_vtap_max_cpu_usage(self, vtap_full_name, start_time, end_time): + '''Maximum CPU usage of the agent on DF by API. Parameter description: + vtap_full_name; required field, Name of vtaps + start_time; required field, Start time for filtering data + end_time; required field, End time for filtering data + ''' + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + sql = '''select Max(`metrics.cpu_percent`) AS RSS, `tag.host` from \ + deepflow_agent_monitor where `tag.host` IN ('%s') AND time>=%s \ + AND time<=%s group by `tag.host` limit 100''' % ( + vtap_full_name, start_time, end_time + ) + data = {'db': 'deepflow_system', 'sql': sql} + data = urlencode(data, encoding='gb2312') + response = requests.post( + url='http://%s:%s/v1/query/' % (self.server_ip, self.query_port), + headers=headers, data=data + ) + # 检查异常响应状态 4xx or 5xx + response.raise_for_status() + log.info(f"get_vtap_max_cpu_usage:: sql:{sql} res: {response.content}") + result = response.json().get('result', {}) + values = result.get('values', []) + if not values: + log.error('No data found in the response.') + assert False + max_cpu = max([float(i[-1]) for i in values]) + max_cpu_percentage = "{:.2f}%".format(float(max_cpu)) + return max_cpu_percentage + + def get_vtap_max_mem_usage(self, vtap_full_name, start_time, end_time): + '''Maximum memory usage of the agent on DF by API. Parameter description: + vtaps_name; required field, Name of vtaps + start_time; required field, Start time for filtering data + end_time; required field, End time for filtering data + ''' + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + sql = '''select Max(`metrics.memory`) AS RSS, `tag.host` from \ + deepflow_agent_monitor where `tag.host` IN ('%s') AND time>=%s \ + AND time<=%s group by `tag.host` limit 100''' % ( + vtap_full_name, start_time, end_time + ) + data = {'db': 'deepflow_system', 'sql': sql} + data = urlencode(data, encoding='gb2312') + response = requests.post( + url='http://%s:%s/v1/query/' % (self.server_ip, self.query_port), + headers=headers, data=data + ) + # 检查异常响应状态 4xx or 5xx + response.raise_for_status() + log.info(f"get_vtap_max_mem_usage:: sql:{sql} res: {response.content}") + result = response.json().get('result', {}) + values = result.get('values', []) + max_mem = max([float(i[-1]) for i in values]) + # B -> MB + max_mem_Mb = "{:.2f}Mb".format(float(max_mem) / 1024 / 1024) + return max_mem_Mb + + def cloud_add_subdomain(self, vpc_name, domain_name, subdomain_name): + ''' + Cloud platform add a subdomain + return: cluster_id + ''' + vpc_lcuuid = self.get_vpc_lcuuid_by_name(vpc_name) + domain_lcuuid = self.get_domain_lcuuid_by_name(domain_name) + cluster_id = self.add_subdomain_agent_sync( + vpc_lcuuid, domain_lcuuid, subdomain_name + ) + return cluster_id + + def get_vpc_lcuuid_by_name(self, vpc_name, retries=50): + vpc_info_list = [] + for _ in range(retries): + vpc_info_list = [] + try: + vpc_url_for_get = f"http://{self.server_ip}:{self.control_port}{url.v2_vpcs_api_prefix}" + res = requests.get(url=vpc_url_for_get) + if res.status_code == 200: + log.info('get vpc list successfully') + vpc_info_list = res.json()['DATA'] + else: + log.error(f"get vpc list failed, res: {res.content}") + except Exception as e: + log.error(f"Failed to get vpc info list: {e}") + for vpc_info in vpc_info_list: + if vpc_name in vpc_info['NAME']: + vpc_lcuuid = vpc_info['LCUUID'] + log.info(f'get vpc lcuuid by name, lcuuid:{vpc_lcuuid}') + return vpc_lcuuid + log.info('vpc info is being synchronized, wait 10s') + time.sleep(10) + log.error( + f"get vpc lcuuid by name error:: vpc_info_list:{vpc_info_list}, vpc_name: {vpc_name}" + ) + return None + + def get_domain_lcuuid_by_name(self, domain_name, retries=50): + domain_info_list = [] + for _ in range(retries): + domain_info_list = [] + try: + domain_url_for_get = f"http://{self.server_ip}:{self.control_port}{url.v2_domains_api_prefix}" + res = requests.get(url=domain_url_for_get) + if res.status_code == 200: + log.info('get domain list successfully') + domain_info_list = res.json()['DATA'] + else: + log.error(f"get domain list failed, res: {res.content}") + except Exception as e: + log.error(f"Failed to get domain info list: {e}") + for domain_info in domain_info_list: + if domain_name in domain_info['NAME']: + domain_lcuuid = domain_info['LCUUID'] + log.info( + f'get domain lcuuid by name, lcuuid:{domain_lcuuid}' + ) + return domain_lcuuid + log.info('domain info is being synchronized, wait 10s') + time.sleep(10) + log.error( + f"get domain lcuuid by name error:: domain_info_list:{domain_info_list}, domain_name: {domain_name}" + ) + return None + + def add_subdomain_agent_sync( + self, vpc_lcuuid, domain_lcuuid, subdomain_name + ): + subdomain_url = f"http://{self.server_ip}:{self.control_port}{url.v2_subdomains_api_prefix}" + data = { + "NAME": "{}".format(subdomain_name), + "CONFIG": { + "vpc_uuid": "{}".format(vpc_lcuuid), + "pod_net_ipv4_cidr_max_mask": 16, + "pod_net_ipv6_cidr_max_mask": 64, + "port_name_regex": "^(cni|flannel|cali|vxlan.calico|tunl|en[ospx]|eth)" + }, + "DOMAIN": "{}".format(domain_lcuuid) + } + data = json.dumps(data) + header = {'content-type': 'application/json'} + res = requests.post(url=subdomain_url, data=data, headers=header) + log.info(f"add subdomain, res: {res.content}") + if res.status_code == 200: + subdomain_info = res.json()['DATA'] + cluster_id = subdomain_info['CLUSTER_ID'] + log.info( + f'add subdomain successfully, vpc_lcuuid:{vpc_lcuuid}, domain_lcuuid:{vpc_lcuuid}, cluster_id:{cluster_id}' + ) + return cluster_id + else: + log.error('add subdomain failed') + assert False diff --git a/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/file/deepflow-agent.yaml b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/file/deepflow-agent.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c5b194aa57022a52a97f8b4fb4a37d6e3ec908c --- /dev/null +++ b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/file/deepflow-agent.yaml @@ -0,0 +1,2 @@ +image: + repository: deepflowce/deepflow-agent diff --git a/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/url.py b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/url.py new file mode 100644 index 0000000000000000000000000000000000000000..71a1c401d4551ceb257eadcdbc4cdf9c19f73ad2 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/agent_tools/deepflow_agent/url.py @@ -0,0 +1,11 @@ +deepflow_agent_rpm_lastest_url = 'https://deepflow-ce.oss-cn-beijing.aliyuncs.com/rpm/agent/latest/linux/amd64/deepflow-agent-rpm.zip' +deepflow_agent_deb_lastest_url = 'https://deepflow-ce.oss-cn-beijing.aliyuncs.com/deb/agent/latest/linux/amd64/deepflow-agent-deb.zip' +deepflow_agent_arm_rpm_lastest_url = f"https://deepflow-ce.oss-cn-beijing.aliyuncs.com/rpm/agent/latest/linux/arm64/deepflow-agent-rpm.zip" +# ------ DeepFlow_CE_API_PREFIX -------- +protocol = 'http://' +v1_domains_api_prefix = '/v1/domains' +v2_domains_api_prefix = '/v2/domains' +v2_vpcs_api_prefix = '/v2/vpcs' +v2_subdomains_api_prefix = '/v2/sub-domains/' +vtaps_api_prefix = '/v1/vtaps' +vtap_groups_api_prefix = '/v1/vtap-groups' diff --git a/evaluation/eval-runner/eval-runner/case/performance_analysis/istio/test_istio_with_agent.py b/evaluation/eval-runner/eval-runner/case/performance_analysis/istio/test_istio_with_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..ba4d490b747677085c1c7abd0105c565c72d8567 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/performance_analysis/istio/test_istio_with_agent.py @@ -0,0 +1,166 @@ +import pytest +import allure,time +from common.utils import step as allure_step +from common.utils import choose_platform +from common.utils import choose_agent +from eval_lib.common.logger import get_logger +from common.results import AgentResults +from agent_tools.base import AgentBase +from common import utils as common_utils +from case.performance_analysis import utils as performance_analysis_utils +from platform_tools.aliyun import ali_const +from common.utils import ssh_pool_default + +log = get_logger() + +case_name = "performance_analysis_istio_with_agent" +case_info = {} +tool_params = ["260", "220", "180"] +server_process_names = [ + "envoy", "ratings", "ws-javaagent.jar", "details", "productpage" +] +def create_http_traffic_action( + istio_ip, traffic_ip, productpage_port ,param +): + ssh = ssh_pool_default.get(traffic_ip) + start_time = int(time.time()) + log.info("start generating http traffic") + _, _, stderr = ssh.exec_command( + f'''wrk2 -c50 -t4 -R {param} -d 100 -L http://{istio_ip}:{productpage_port}/productpage | grep -E "(Latency Distribution|Requests/sec)" -A 8 | grep -E "^( 50.000| 90.000|Requests/sec:)"| awk '{{print $2}}' > traffic_result.log''' + ) + err = stderr.readlines() + if err: + log.error(f"wrk2 err, log:{err}") + log.info("complete http traffic generation") + end_time = int(time.time()) + return start_time, end_time + +class TestPerformanceAnalysisIstioWithAgent(): + + @classmethod + def setup_class(cls): + uuid = common_utils.get_case_uuid() + cls.instance_name_agent = f"{case_name}-agent-{uuid}" + cls.instance_name_traffic = f"{case_name}-traffic-{uuid}" + cls.agent_name = f"{case_name}_{uuid}" + cls.result = AgentResults(case_name=case_name) + cls.result.add_case_info(info=case_info) + cls.agent:AgentBase = None + + @classmethod + def teardown_class(cls): + cls.result.generate_yaml_file() + cls.agent.clear() + + + @allure.suite('performance analysis') + @allure.epic('Agent performance analysis') + @allure.feature('') + @allure.title('Agent性能分析 - istio') + @allure.description('Test the performance of the agent on istio') + @pytest.mark.medium + def test_performance_analysis_istio_with_agent(self): + with allure_step('step 1: create instance'): + Platform = choose_platform() + if Platform: + instance_info = Platform.create_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic], + image_id=ali_const.ali_image_id_performance_analysis, + ) + else: + log.info("no platform, use default ip") + instance_info = { + self.instance_name_agent: common_utils.get_fixed_host_ip(self.instance_name_agent), + self.instance_name_traffic: common_utils.get_fixed_host_ip(self.instance_name_traffic) + } + + agent_ip = instance_info[self.instance_name_agent] + traffic_ip = instance_info[self.instance_name_traffic] + with allure_step('step 2: install agent'): + Agent = choose_agent() + TestPerformanceAnalysisIstioWithAgent.agent = Agent() + agent_meta = common_utils.get_meta_data(agent_ip) + TestPerformanceAnalysisIstioWithAgent.agent.init( + agent_name=self.agent_name, + meta=agent_meta + ) + self.agent.deploy() + + with allure_step('step 3: sync agent'): + self.agent.start() + self.agent.ensure_agent_status_available() + self.agent.configure_agent() + log.info("wait 120s") + time.sleep(120) + + with allure_step('step 4: update telegraf config'): + common_utils.upload_files( + vm_ip=agent_ip, + local_path="case/performance_analysis/tools/telegraf.conf", + remote_path="telegraf.conf", + ssh_pool=self.agent.get_ssh_pool(), + ) + performance_analysis_utils.reload_telegraf_conf( + vm_ip=agent_ip, + ssh_pool=self.agent.get_ssh_pool() + ) + with allure_step("step 5: install istio"): + if Platform: + performance_analysis_utils.install_istio( + vm_ip=agent_ip, + ssh_pool=self.agent.get_ssh_pool(), + ) + performance_analysis_utils.init_istio( + vm_ip=agent_ip, + ssh_pool=self.agent.get_ssh_pool(), + ) + else: + log.info("no platform, no install istio") + + productpage_port = performance_analysis_utils.get_istio_productpage_server_port( + vm_ip=agent_ip, + ssh_pool=self.agent.get_ssh_pool(), + ) + server_process_names.append(self.agent.AGENT_PROCESS_NAME) + for i in range(len(tool_params)): + param = tool_params[i] + result_data = {} + with allure_step(f'step 6.{i}: start wrk2 traffic tool, rate {param}'): + result_data["case.command"] = f"wrk2 -c50 -t4 -R {param} -d 100 -L http://{agent_ip}:{productpage_port}/productpage" + result_data["server.rate"] = param + start_time, end_time = create_http_traffic_action( + istio_ip=agent_ip, + traffic_ip=traffic_ip, + productpage_port=productpage_port, + param=param, + ) + time.sleep(30) + # 打流工具的数据 + wrk2_result_data = performance_analysis_utils.get_traffic_tool_data( + vm_ip=traffic_ip, + ) + result_data.update(wrk2_result_data) + log.info(wrk2_result_data) + # telegraf采集的数据 + telegraf_result_data = performance_analysis_utils.get_process_usage_by_telegraf( + vm_ip=agent_ip, + process_name_list=server_process_names, + start_time=start_time, + end_time=end_time, + ) + result_data.update(telegraf_result_data) + log.info(telegraf_result_data) + # agent_result_data = agent.get_metric_data_by_agent(start_time=start_time, end_time=end_time) + # self.result.add_result_data(data=agent_result_data) + # log.info(agent_result_data) + self.result.add_result_data(data=result_data, index=i) + with allure_step('step n: delete instance and clear'): + if Platform: + Platform.delete_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic] + ) + else: + log.info("no platform, use default ip, no delete") + + + diff --git a/evaluation/eval-runner/eval-runner/case/performance_analysis/istio/test_istio_without_agent.py b/evaluation/eval-runner/eval-runner/case/performance_analysis/istio/test_istio_without_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..bc90a3cd3f9f05baad281a6ed6e24cd9a3c025e3 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/performance_analysis/istio/test_istio_without_agent.py @@ -0,0 +1,140 @@ +import pytest +import allure,time +from common.utils import step as allure_step +from common.utils import choose_platform +from eval_lib.common.logger import get_logger +from common.results import AgentResults +from common import utils as common_utils +from case.performance_analysis import utils as performance_analysis_utils +from platform_tools.aliyun import ali_const +from common.utils import ssh_pool_default + +log = get_logger() + +case_name = "performance_analysis_istio_without_agent" +case_info = {} +tool_params = ["260", "220", "180"] +server_process_names = [ + "envoy", "ratings", "ws-javaagent.jar", "details", "productpage" +] +def create_http_traffic_action( + istio_ip, traffic_ip, productpage_port ,param +): + ssh = ssh_pool_default.get(traffic_ip) + start_time = int(time.time()) + log.info("start generating http traffic") + _, _, stderr = ssh.exec_command( + f'''wrk2 -c50 -t4 -R {param} -d 100 -L http://{istio_ip}:{productpage_port}/productpage | grep -E "(Latency Distribution|Requests/sec)" -A 8 | grep -E "^( 50.000| 90.000|Requests/sec:)"| awk '{{print $2}}' > traffic_result.log''' + ) + err = stderr.readlines() + if err: + log.error(f"wrk2 err, log:{err}") + log.info("complete http traffic generation") + end_time = int(time.time()) + return start_time, end_time + +class TestPerformanceAnalysisIstioWithAgent(): + + @classmethod + def setup_class(cls): + uuid = common_utils.get_case_uuid() + cls.instance_name_agent = f"{case_name}-agent-{uuid}" + cls.instance_name_traffic = f"{case_name}-traffic-{uuid}" + cls.result = AgentResults(case_name=case_name) + cls.result.add_case_info(info=case_info) + + @classmethod + def teardown_class(cls): + cls.result.generate_yaml_file() + + + + @allure.suite('performance analysis') + @allure.epic('Agent performance analysis') + @allure.feature('') + @allure.title('Agent性能分析 - istio') + @allure.description('Test the performance of the agent on istio') + @pytest.mark.medium + def test_performance_analysis_istio_with_agent(self): + with allure_step('step 1: create instance'): + Platform = choose_platform() + if Platform: + instance_info = Platform.create_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic], + image_id=ali_const.ali_image_id_performance_analysis, + ) + else: + log.info("no platform, use default ip") + instance_info = { + self.instance_name_agent: common_utils.get_fixed_host_ip(self.instance_name_agent), + self.instance_name_traffic: common_utils.get_fixed_host_ip(self.instance_name_traffic) + } + agent_ip = instance_info[self.instance_name_agent] + traffic_ip = instance_info[self.instance_name_traffic] + + with allure_step('step 2: update telegraf config'): + common_utils.upload_files( + vm_ip=agent_ip, + local_path="case/performance_analysis/tools/telegraf.conf", + remote_path="telegraf.conf", + ssh_pool=ssh_pool_default, + ) + performance_analysis_utils.reload_telegraf_conf( + vm_ip=agent_ip, + ssh_pool=ssh_pool_default + ) + with allure_step("step 3: install istio"): + if Platform: + performance_analysis_utils.install_istio( + vm_ip=agent_ip, + ssh_pool=ssh_pool_default, + ) + performance_analysis_utils.init_istio( + vm_ip=agent_ip, + ssh_pool=ssh_pool_default, + ) + else: + log.info("no platform, no install istio") + productpage_port = performance_analysis_utils.get_istio_productpage_server_port( + vm_ip=agent_ip, + ssh_pool=ssh_pool_default, + ) + for i in range(len(tool_params)): + param = tool_params[i] + result_data = {} + with allure_step(f'step 4.{i}: start wrk2 traffic tool, rate {param}'): + result_data["case.command"] = f"wrk2 -c50 -t4 -R {param} -d 100 -L http://{agent_ip}:{productpage_port}/productpage" + result_data["server.rate"] = param + start_time, end_time = create_http_traffic_action( + istio_ip=agent_ip, + traffic_ip=traffic_ip, + productpage_port=productpage_port, + param=param, + ) + time.sleep(30) + # 打流工具的数据 + wrk2_result_data = performance_analysis_utils.get_traffic_tool_data( + vm_ip=traffic_ip, + ) + result_data.update(wrk2_result_data) + log.info(wrk2_result_data) + # telegraf采集的数据 + telegraf_result_data = performance_analysis_utils.get_process_usage_by_telegraf( + vm_ip=agent_ip, + process_name_list=server_process_names, + start_time=start_time, + end_time=end_time, + ) + result_data.update(telegraf_result_data) + log.info(telegraf_result_data) + self.result.add_result_data(data=result_data, index=i) + with allure_step('step n: delete instance and clear'): + if Platform: + Platform.delete_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic] + ) + else: + log.info("no platform, use default ip, no delete") + + + diff --git a/evaluation/eval-runner/eval-runner/case/performance_analysis/nginx/test_nginx_with_agent.py b/evaluation/eval-runner/eval-runner/case/performance_analysis/nginx/test_nginx_with_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..fa49e6889a87c590f44160c7c7c02712c9a516fd --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/performance_analysis/nginx/test_nginx_with_agent.py @@ -0,0 +1,151 @@ +import pytest +import allure,time +from common.utils import step as allure_step +from common.utils import choose_platform +from common.utils import choose_agent +from eval_lib.common.logger import get_logger +from common.results import AgentResults +from agent_tools.base import AgentBase +from common import utils as common_utils +from case.performance_analysis import utils as performance_analysis_utils +from platform_tools.aliyun import ali_const +from common.utils import ssh_pool_default + +log = get_logger() + +case_name = "performance_analysis_nginx_http_with_agent" +case_info = {} +tool_params = ["42000", "38000", "34000"] + +def create_http_traffic_action( + nginx_ip, traffic_ip, param +): + ssh = ssh_pool_default.get(traffic_ip) + start_time = int(time.time()) + log.info("start generating http traffic") + _, _, stderr = ssh.exec_command( + f'''wrk2 -c20 -t20 -R {param} -d 100 -L http://{nginx_ip}:80/index.html | grep -E "(Latency Distribution|Requests/sec)" -A 8 | grep -E "^( 50.000| 90.000|Requests/sec:)"| awk '{{print $2}}' > traffic_result.log''' + ) + err = stderr.readlines() + if err: + log.error(f"wrk2 err, log:{err}") + log.info("complete http traffic generation") + end_time = int(time.time()) + return start_time, end_time + +class TestPerformanceAnalysisNginxHttpWithAgent(): + + @classmethod + def setup_class(cls): + uuid = common_utils.get_case_uuid() + cls.instance_name_agent = f"{case_name}-agent-{uuid}" + cls.instance_name_traffic = f"{case_name}-traffic-{uuid}" + cls.agent_name = f"{case_name}_{uuid}" + cls.result = AgentResults(case_name=case_name) + cls.result.add_case_info(info=case_info) + cls.agent:AgentBase = None + + @classmethod + def teardown_class(cls): + cls.result.generate_yaml_file() + cls.agent.clear() + + @allure.suite('performance analysis') + @allure.epic('Agent performance analysis') + @allure.feature('') + @allure.title('Agent性能分析 - http') + @allure.description('Test the performance of the agent on the http protocol') + @pytest.mark.medium + def test_performance_analysis_nginx_http_with_agent(self): + with allure_step('step 1: create instance'): + Platform = choose_platform() + if Platform: + instance_info = Platform.create_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic], + image_id=ali_const.ali_image_id_performance_analysis, + ) + else: + log.info("no platform, use default ip") + instance_info = { + self.instance_name_agent: common_utils.get_fixed_host_ip(self.instance_name_agent), + self.instance_name_traffic: common_utils.get_fixed_host_ip(self.instance_name_traffic) + } + agent_ip = instance_info[self.instance_name_agent] + traffic_ip = instance_info[self.instance_name_traffic] + with allure_step('step 2: install agent'): + Agent = choose_agent() + TestPerformanceAnalysisNginxHttpWithAgent.agent = Agent() + agent_meta = common_utils.get_meta_data(agent_ip) + TestPerformanceAnalysisNginxHttpWithAgent.agent.init( + agent_name=self.agent_name, + meta=agent_meta + ) + self.agent.deploy() + + with allure_step('step 3: sync agent'): + self.agent.start() + self.agent.ensure_agent_status_available() + self.agent.configure_agent() + log.info("wait 120s") + time.sleep(120) + + with allure_step('step 4: update telegraf config and start nginx'): + common_utils.upload_files( + vm_ip=agent_ip, + local_path="case/performance_analysis/tools/telegraf.conf", + remote_path="telegraf.conf", + ssh_pool=self.agent.get_ssh_pool(), + ) + performance_analysis_utils.reload_telegraf_conf( + vm_ip=agent_ip, + ssh_pool=self.agent.get_ssh_pool() + ) + + common_utils.ensure_process_running( + vm_ip=agent_ip, + process_name="nginx", + ssh_pool=self.agent.get_ssh_pool(), + ) + + for i in range(len(tool_params)): + param = tool_params[i] + result_data = {} + with allure_step(f'step 5.{i}: start wrk2 traffic tool, rate {param}'): + result_data["case.command"] = f"wrk2 -c20 -t20 -R {param} -d 100 -L http://{agent_ip}:80/index.html" + result_data["server.rate"] = param + start_time, end_time = create_http_traffic_action( + nginx_ip=agent_ip, + traffic_ip=traffic_ip, + param=param, + ) + time.sleep(30) + # 打流工具的数据 + wrk2_result_data = performance_analysis_utils.get_traffic_tool_data( + vm_ip=traffic_ip, + ) + result_data.update(wrk2_result_data) + log.info(wrk2_result_data) + # telegraf采集的数据 + monitored_process_name = ["nginx", self.agent.AGENT_PROCESS_NAME] + telegraf_result_data = performance_analysis_utils.get_process_usage_by_telegraf( + vm_ip=agent_ip, + process_name_list=monitored_process_name, + start_time=start_time, + end_time=end_time, + ) + result_data.update(telegraf_result_data) + log.info(telegraf_result_data) + # agent_result_data = agent.get_metric_data_by_agent(start_time=start_time, end_time=end_time) + # self.result.add_result_data(data=agent_result_data) + # log.info(agent_result_data) + self.result.add_result_data(data=result_data, index=i) + with allure_step('step n: delete instance and clear'): + if Platform: + Platform.delete_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic] + ) + else: + log.info("no platform, use default ip, no delete") + + + diff --git a/evaluation/eval-runner/eval-runner/case/performance_analysis/nginx/test_nginx_without_agent.py b/evaluation/eval-runner/eval-runner/case/performance_analysis/nginx/test_nginx_without_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..3296cdebe47ff1f89e65543e342ba8d55e3b040d --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/performance_analysis/nginx/test_nginx_without_agent.py @@ -0,0 +1,125 @@ +import pytest +import allure,time +from common.utils import step as allure_step +from common.utils import choose_platform +from eval_lib.common.logger import get_logger +from common.results import AgentResults +from common import utils as common_utils +from case.performance_analysis import utils as performance_analysis_utils +from platform_tools.aliyun import ali_const +from common.utils import ssh_pool_default + +log = get_logger() + +case_name = "performance_analysis_nginx_http_without_agent" +case_info = {} +tool_params = ["42000", "38000", "34000"] + +def create_http_traffic_action( + nginx_ip, traffic_ip, param +): + ssh = ssh_pool_default.get(traffic_ip) + start_time = int(time.time()) + log.info("start generating http traffic") + _, _, stderr = ssh.exec_command( + f'''wrk2 -c20 -t20 -R {param} -d 100 -L http://{nginx_ip}:80/index.html | grep -E "(Latency Distribution|Requests/sec)" -A 8 | grep -E "^( 50.000| 90.000|Requests/sec:)"| awk '{{print $2}}' > traffic_result.log''' + ) + err = stderr.readlines() + if err: + log.error(f"wrk2 err, log:{err}") + log.info("complete http traffic generation") + end_time = int(time.time()) + return start_time, end_time + +class TestPerformanceAnalysisNginxHttpWithoutAgent(): + + @classmethod + def setup_class(cls): + uuid = common_utils.get_case_uuid() + cls.instance_name_agent = f"{case_name}-agent-{uuid}" + cls.instance_name_traffic = f"{case_name}-traffic-{uuid}" + cls.result = AgentResults(case_name=case_name) + cls.result.add_case_info(info=case_info) + + @classmethod + def teardown_class(cls): + cls.result.generate_yaml_file() + + @allure.suite('performance analysis') + @allure.epic('Agent performance analysis') + @allure.feature('') + @allure.title('Agent性能分析无agent - http') + @allure.description('Test the performance of the agent on the http protocol') + @pytest.mark.medium + def test_performance_analysis_nginx_http_without_agent(self): + with allure_step('step 1: create instance'): + Platform = choose_platform() + if Platform: + instance_info = Platform.create_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic], + image_id=ali_const.ali_image_id_performance_analysis, + ) + else: + log.info("no platform, use default ip") + instance_info = { + self.instance_name_agent: common_utils.get_fixed_host_ip(self.instance_name_agent), + self.instance_name_traffic: common_utils.get_fixed_host_ip(self.instance_name_traffic) + } + agent_ip = instance_info[self.instance_name_agent] + traffic_ip = instance_info[self.instance_name_traffic] + + with allure_step('step 2: update telegraf config and start nginx'): + common_utils.upload_files( + vm_ip=agent_ip, + local_path="case/performance_analysis/tools/telegraf.conf", + remote_path="telegraf.conf", + ssh_pool=ssh_pool_default, + ) + performance_analysis_utils.reload_telegraf_conf( + vm_ip=agent_ip, + ssh_pool=ssh_pool_default + ) + common_utils.ensure_process_running( + vm_ip=agent_ip, + process_name="nginx", + ssh_pool=ssh_pool_default, + ) + + for i in range(len(tool_params)): + param = tool_params[i] + result_data = {} + with allure_step(f'step 3.{i}: start wrk2 traffic tool, rate {param}'): + result_data["case.command"] = f"wrk2 -c20 -t20 -R {param} -d 100 -L http://{agent_ip}:80/index.html" + result_data["server.rate"] = param + start_time, end_time = create_http_traffic_action( + nginx_ip=agent_ip, + traffic_ip=traffic_ip, + param=param, + ) + time.sleep(30) + # 打流工具的数据 + wrk2_result_data = performance_analysis_utils.get_traffic_tool_data( + vm_ip=traffic_ip, + ) + result_data.update(wrk2_result_data) + log.info(wrk2_result_data) + # telegraf采集的数据 + monitored_process_name = ["nginx"] + telegraf_result_data = performance_analysis_utils.get_process_usage_by_telegraf( + vm_ip=agent_ip, + process_name_list=monitored_process_name, + start_time=start_time, + end_time=end_time, + ) + result_data.update(telegraf_result_data) + log.info(telegraf_result_data) + self.result.add_result_data(data=result_data, index=i) + with allure_step('step n: delete instance and clear'): + if Platform: + Platform.delete_instances( + instance_names=[self.instance_name_agent,self.instance_name_traffic] + ) + else: + log.info("no platform, use default ip, no delete") + + diff --git a/evaluation/eval-runner/eval-runner/case/performance_analysis/tools/telegraf.conf b/evaluation/eval-runner/eval-runner/case/performance_analysis/tools/telegraf.conf new file mode 100644 index 0000000000000000000000000000000000000000..56e50b7a8d6efd0ba2cf2e31dc9387eff95c0cb5 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/performance_analysis/tools/telegraf.conf @@ -0,0 +1,6687 @@ +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) + + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## Maximum number of unwritten metrics per output. Increasing this value + ## allows for longer periods of output downtime without dropping metrics at the + ## cost of higher maximum memory usage. + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. Maximum flush_interval will be + ## flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s. + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Log at debug level. + # debug = false + ## Log only error level messages. + # quiet = false + + ## Log target controls the destination for logs and can be one of "file", + ## "stderr" or, on Windows, "eventlog". When set to "file", the output file + ## is determined by the "logfile" setting. + # logtarget = "file" + + ## Name of the file to be logged to when using the "file" logtarget. If set to + ## the empty string then logs are written to stderr. + # logfile = "" + + ## The logfile will be rotated after the time interval specified. When set + ## to 0 no time based rotation is performed. Logs are rotated only when + ## written to, if there is no log activity rotation may be delayed. + # logfile_rotation_interval = "0d" + + ## The logfile will be rotated when it becomes larger than the specified + ## size. When set to 0 no size based rotation is performed. + # logfile_rotation_max_size = "0MB" + + ## Maximum number of rotated archives to keep, any older logs are deleted. + ## If set to -1, no archives are removed. + # logfile_rotation_max_archives = 5 + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + + +# Configuration for sending metrics to InfluxDB +[[outputs.influxdb]] + ## The full HTTP or UDP URL for your InfluxDB instance. + ## + ## Multiple URLs can be specified for a single cluster, only ONE of the + ## urls will be written to each interval. + # urls = ["unix:///var/run/influxdb.sock"] + # urls = ["udp://127.0.0.1:8089"] + # urls = ["http://127.0.0.1:8086"] + + ## The target database for metrics; will be created as needed. + ## For UDP url endpoint database needs to be configured on server side. + # database = "telegraf" + + ## The value of this tag will be used to determine the database. If this + ## tag is not set the 'database' option is used as the default. + # database_tag = "" + + ## If true, the 'database_tag' will not be included in the written metric. + # exclude_database_tag = false + + ## If true, no CREATE DATABASE queries will be sent. Set to true when using + ## Telegraf with a user without permissions to create databases or when the + ## database already exists. + # skip_database_creation = false + + ## Name of existing retention policy to write to. Empty string writes to + ## the default retention policy. Only takes effect when using HTTP. + # retention_policy = "" + + ## The value of this tag will be used to determine the retention policy. If this + ## tag is not set the 'retention_policy' option is used as the default. + # retention_policy_tag = "" + + ## If true, the 'retention_policy_tag' will not be included in the written metric. + # exclude_retention_policy_tag = false + + ## Write consistency (clusters only), can be: "any", "one", "quorum", "all". + ## Only takes effect when using HTTP. + # write_consistency = "any" + + ## Timeout for HTTP messages. + # timeout = "5s" + + ## HTTP Basic Auth + username = "root" + password = "" + + ## HTTP User-Agent + # user_agent = "telegraf" + + ## UDP payload size is the maximum packet size to send. + # udp_payload = "512B" + + ## Optional TLS Config for use on HTTP connections. + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false + + ## HTTP Proxy override, if unset values the standard proxy environment + ## variables are consulted to determine which proxy, if any, should be used. + # http_proxy = "http://corporate.proxy:3128" + + ## Additional HTTP headers + # http_headers = {"X-Special-Header" = "Special-Value"} + + ## HTTP Content-Encoding for write request body, can be set to "gzip" to + ## compress body or "identity" to apply no encoding. + # content_encoding = "identity" + + ## When true, Telegraf will output unsigned integers as unsigned values, + ## i.e.: "42u". You will need a version of InfluxDB supporting unsigned + ## integer values. Enabling this option will result in field type errors if + ## existing data has been written. + # influx_uint_support = false + + +# # Configuration for Amon Server to send metrics to. +# [[outputs.amon]] +# ## Amon Server Key +# server_key = "my-server-key" # required. +# +# ## Amon Instance URL +# amon_instance = "https://youramoninstance" # required +# +# ## Connection timeout. +# # timeout = "5s" + + +# # Publishes metrics to an AMQP broker +# [[outputs.amqp]] +# ## Broker to publish to. +# ## deprecated in 1.7; use the brokers option +# # url = "amqp://localhost:5672/influxdb" +# +# ## Brokers to publish to. If multiple brokers are specified a random broker +# ## will be selected anytime a connection is established. This can be +# ## helpful for load balancing when not using a dedicated load balancer. +# brokers = ["amqp://localhost:5672/influxdb"] +# +# ## Maximum messages to send over a connection. Once this is reached, the +# ## connection is closed and a new connection is made. This can be helpful for +# ## load balancing when not using a dedicated load balancer. +# # max_messages = 0 +# +# ## Exchange to declare and publish to. +# exchange = "telegraf" +# +# ## Exchange type; common types are "direct", "fanout", "topic", "header", "x-consistent-hash". +# # exchange_type = "topic" +# +# ## If true, exchange will be passively declared. +# # exchange_passive = false +# +# ## Exchange durability can be either "transient" or "durable". +# # exchange_durability = "durable" +# +# ## Additional exchange arguments. +# # exchange_arguments = { } +# # exchange_arguments = {"hash_propery" = "timestamp"} +# +# ## Authentication credentials for the PLAIN auth_method. +# # username = "" +# # password = "" +# +# ## Auth method. PLAIN and EXTERNAL are supported +# ## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as +# ## described here: https://www.rabbitmq.com/plugins.html +# # auth_method = "PLAIN" +# +# ## Metric tag to use as a routing key. +# ## ie, if this tag exists, its value will be used as the routing key +# # routing_tag = "host" +# +# ## Static routing key. Used when no routing_tag is set or as a fallback +# ## when the tag specified in routing tag is not found. +# # routing_key = "" +# # routing_key = "telegraf" +# +# ## Delivery Mode controls if a published message is persistent. +# ## One of "transient" or "persistent". +# # delivery_mode = "transient" +# +# ## InfluxDB database added as a message header. +# ## deprecated in 1.7; use the headers option +# # database = "telegraf" +# +# ## InfluxDB retention policy added as a message header +# ## deprecated in 1.7; use the headers option +# # retention_policy = "default" +# +# ## Static headers added to each published message. +# # headers = { } +# # headers = {"database" = "telegraf", "retention_policy" = "default"} +# +# ## Connection timeout. If not provided, will default to 5s. 0s means no +# ## timeout (not recommended). +# # timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## If true use batch serialization format instead of line based delimiting. +# ## Only applies to data formats which are not line based such as JSON. +# ## Recommended to set to true. +# # use_batch_format = false +# +# ## Content encoding for message payloads, can be set to "gzip" to or +# ## "identity" to apply no encoding. +# ## +# ## Please note that when use_batch_format = false each amqp message contains only +# ## a single metric, it is recommended to use compression with batch format +# ## for best results. +# # content_encoding = "identity" +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# # data_format = "influx" + + +# # Send metrics to Azure Application Insights +# [[outputs.application_insights]] +# ## Instrumentation key of the Application Insights resource. +# instrumentation_key = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxx" +# +# ## Timeout for closing (default: 5s). +# # timeout = "5s" +# +# ## Enable additional diagnostic logging. +# # enable_diagnostic_logging = false +# +# ## Context Tag Sources add Application Insights context tags to a tag value. +# ## +# ## For list of allowed context tag keys see: +# ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go +# # [outputs.application_insights.context_tag_sources] +# # "ai.cloud.role" = "kubernetes_container_name" +# # "ai.cloud.roleInstance" = "kubernetes_pod_name" + + +# # Send aggregate metrics to Azure Monitor +# [[outputs.azure_monitor]] +# ## Timeout for HTTP writes. +# # timeout = "20s" +# +# ## Set the namespace prefix, defaults to "Telegraf/". +# # namespace_prefix = "Telegraf/" +# +# ## Azure Monitor doesn't have a string value type, so convert string +# ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows +# ## a maximum of 10 dimensions so Telegraf will only send the first 10 +# ## alphanumeric dimensions. +# # strings_as_dimensions = false +# +# ## Both region and resource_id must be set or be available via the +# ## Instance Metadata service on Azure Virtual Machines. +# # +# ## Azure Region to publish metrics against. +# ## ex: region = "southcentralus" +# # region = "" +# # +# ## The Azure Resource ID against which metric will be logged, e.g. +# ## ex: resource_id = "/subscriptions//resourceGroups//providers/Microsoft.Compute/virtualMachines/" +# # resource_id = "" +# +# ## Optionally, if in Azure US Government, China or other sovereign +# ## cloud environment, set appropriate REST endpoint for receiving +# ## metrics. (Note: region may be unused in this context) +# # endpoint_url = "https://monitoring.core.usgovcloudapi.net" + + +# # Publish Telegraf metrics to a Google Cloud PubSub topic +# [[outputs.cloud_pubsub]] +# ## Required. Name of Google Cloud Platform (GCP) Project that owns +# ## the given PubSub topic. +# project = "my-project" +# +# ## Required. Name of PubSub topic to publish metrics to. +# topic = "my-topic" +# +# ## Required. Data format to consume. +# ## Each data format has its own unique set of configuration options. +# ## Read more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" +# +# ## Optional. Filepath for GCP credentials JSON file to authorize calls to +# ## PubSub APIs. If not set explicitly, Telegraf will attempt to use +# ## Application Default Credentials, which is preferred. +# # credentials_file = "path/to/my/creds.json" +# +# ## Optional. If true, will send all metrics per write in one PubSub message. +# # send_batched = true +# +# ## The following publish_* parameters specifically configures batching +# ## requests made to the GCP Cloud PubSub API via the PubSub Golang library. Read +# ## more here: https://godoc.org/cloud.google.com/go/pubsub#PublishSettings +# +# ## Optional. Send a request to PubSub (i.e. actually publish a batch) +# ## when it has this many PubSub messages. If send_batched is true, +# ## this is ignored and treated as if it were 1. +# # publish_count_threshold = 1000 +# +# ## Optional. Send a request to PubSub (i.e. actually publish a batch) +# ## when it has this many PubSub messages. If send_batched is true, +# ## this is ignored and treated as if it were 1 +# # publish_byte_threshold = 1000000 +# +# ## Optional. Specifically configures requests made to the PubSub API. +# # publish_num_go_routines = 2 +# +# ## Optional. Specifies a timeout for requests to the PubSub API. +# # publish_timeout = "30s" +# +# ## Optional. If true, published PubSub message data will be base64-encoded. +# # base64_data = false +# +# ## Optional. PubSub attributes to add to metrics. +# # [[inputs.pubsub.attributes]] +# # my_attr = "tag_value" + + +# # Configuration for AWS CloudWatch output. +# [[outputs.cloudwatch]] +# ## Amazon REGION +# region = "us-east-1" +# +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile +# #access_key = "" +# #secret_key = "" +# #token = "" +# #role_arn = "" +# #profile = "" +# #shared_credential_file = "" +# +# ## Endpoint to make request against, the correct endpoint is automatically +# ## determined and this option should only be set if you wish to override the +# ## default. +# ## ex: endpoint_url = "http://localhost:8000" +# # endpoint_url = "" +# +# ## Namespace for the CloudWatch MetricDatums +# namespace = "InfluxData/Telegraf" +# +# ## If you have a large amount of metrics, you should consider to send statistic +# ## values instead of raw metrics which could not only improve performance but +# ## also save AWS API cost. If enable this flag, this plugin would parse the required +# ## CloudWatch statistic fields (count, min, max, and sum) and send them to CloudWatch. +# ## You could use basicstats aggregator to calculate those fields. If not all statistic +# ## fields are available, all fields would still be sent as raw metrics. +# # write_statistics = false +# +# ## Enable high resolution metrics of 1 second (if not enabled, standard resolution are of 60 seconds precision) +# # high_resolution_metrics = false + + +# # Configuration for CrateDB to send metrics to. +# [[outputs.cratedb]] +# # A github.com/jackc/pgx connection string. +# # See https://godoc.org/github.com/jackc/pgx#ParseDSN +# url = "postgres://user:password@localhost/schema?sslmode=disable" +# # Timeout for all CrateDB queries. +# timeout = "5s" +# # Name of the table to store metrics in. +# table = "metrics" +# # If true, and the metrics table does not exist, create it automatically. +# table_create = true + + +# # Configuration for DataDog API to send metrics to. +# [[outputs.datadog]] +# ## Datadog API key +# apikey = "my-secret-key" # required. +# +# # The base endpoint URL can optionally be specified but it defaults to: +# #url = "https://app.datadoghq.com/api/v1/series" +# +# ## Connection timeout. +# # timeout = "5s" + + +# # Send metrics to nowhere at all +# [[outputs.discard]] +# # no configuration + + +# # Configuration for Elasticsearch to send metrics to. +# [[outputs.elasticsearch]] +# ## The full HTTP endpoint URL for your Elasticsearch instance +# ## Multiple urls can be specified as part of the same cluster, +# ## this means that only ONE of the urls will be written to each interval. +# urls = [ "http://node1.es.example.com:9200" ] # required. +# ## Elasticsearch client timeout, defaults to "5s" if not set. +# timeout = "5s" +# ## Set to true to ask Elasticsearch a list of all cluster nodes, +# ## thus it is not necessary to list all nodes in the urls config option. +# enable_sniffer = false +# ## Set the interval to check if the Elasticsearch nodes are available +# ## Setting to "0s" will disable the health check (not recommended in production) +# health_check_interval = "10s" +# ## HTTP basic authentication details +# # username = "telegraf" +# # password = "mypassword" +# +# ## Index Config +# ## The target index for metrics (Elasticsearch will create if it not exists). +# ## You can use the date specifiers below to create indexes per time frame. +# ## The metric timestamp will be used to decide the destination index name +# # %Y - year (2016) +# # %y - last two digits of year (00..99) +# # %m - month (01..12) +# # %d - day of month (e.g., 01) +# # %H - hour (00..23) +# # %V - week of the year (ISO week) (01..53) +# ## Additionally, you can specify a tag name using the notation {{tag_name}} +# ## which will be used as part of the index name. If the tag does not exist, +# ## the default tag value will be used. +# # index_name = "telegraf-{{host}}-%Y.%m.%d" +# # default_tag_value = "none" +# index_name = "telegraf-%Y.%m.%d" # required. +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Template Config +# ## Set to true if you want telegraf to manage its index template. +# ## If enabled it will create a recommended index template for telegraf indexes +# manage_template = true +# ## The template name used for telegraf indexes +# template_name = "telegraf" +# ## Set to true if you want telegraf to overwrite an existing template +# overwrite_template = false + + +# # Send metrics to command as input over stdin +# [[outputs.exec]] +# ## Command to ingest metrics via stdin. +# command = ["tee", "-a", "/dev/null"] +# +# ## Timeout for command to complete. +# # timeout = "5s" +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# # data_format = "influx" + + +# # Send telegraf metrics to file(s) +# [[outputs.file]] +# ## Files to write to, "stdout" is a specially handled file. +# files = ["stdout", "/tmp/metrics.out"] +# +# ## Use batch serialization format instead of line based delimiting. The +# ## batch format allows for the production of non line based output formats and +# ## may more effiently encode metric groups. +# # use_batch_format = false +# +# ## The file will be rotated after the time interval specified. When set +# ## to 0 no time based rotation is performed. +# # rotation_interval = "0d" +# +# ## The logfile will be rotated when it becomes larger than the specified +# ## size. When set to 0 no size based rotation is performed. +# # rotation_max_size = "0MB" +# +# ## Maximum number of rotated archives to keep, any older logs are deleted. +# ## If set to -1, no archives are removed. +# # rotation_max_archives = 5 +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Configuration for Graphite server to send metrics to +# [[outputs.graphite]] +# ## TCP endpoint for your graphite instance. +# ## If multiple endpoints are configured, output will be load balanced. +# ## Only one of the endpoints will be written to with each iteration. +# servers = ["localhost:2003"] +# ## Prefix metrics name +# prefix = "" +# ## Graphite output template +# ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# template = "host.tags.measurement.field" +# +# ## Enable Graphite tags support +# # graphite_tag_support = false +# +# ## timeout in seconds for the write connection to graphite +# timeout = 2 +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Send telegraf metrics to graylog +# [[outputs.graylog]] +# ## UDP endpoint for your graylog instance. +# servers = ["127.0.0.1:12201"] +# +# ## The field to use as the GELF short_message, if unset the static string +# ## "telegraf" will be used. +# ## example: short_message_field = "message" +# # short_message_field = "" + + +# # Configurable HTTP health check resource based on metrics +# [[outputs.health]] +# ## Address and port to listen on. +# ## ex: service_address = "http://localhost:8080" +# ## service_address = "unix:///var/run/telegraf-health.sock" +# # service_address = "http://:8080" +# +# ## The maximum duration for reading the entire request. +# # read_timeout = "5s" +# ## The maximum duration for writing the entire response. +# # write_timeout = "5s" +# +# ## Username and password to accept for HTTP basic authentication. +# # basic_username = "user1" +# # basic_password = "secret" +# +# ## Allowed CA certificates for client certificates. +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## TLS server certificate and private key. +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## One or more check sub-tables should be defined, it is also recommended to +# ## use metric filtering to limit the metrics that flow into this output. +# ## +# ## When using the default buffer sizes, this example will fail when the +# ## metric buffer is half full. +# ## +# ## namepass = ["internal_write"] +# ## tagpass = { output = ["influxdb"] } +# ## +# ## [[outputs.health.compares]] +# ## field = "buffer_size" +# ## lt = 5000.0 +# ## +# ## [[outputs.health.contains]] +# ## field = "buffer_size" + + +# # A plugin that can transmit metrics over HTTP +# [[outputs.http]] +# ## URL is the address to send metrics to +# url = "http://127.0.0.1:8080/telegraf" +# +# ## Timeout for HTTP message +# # timeout = "5s" +# +# ## HTTP method, one of: "POST" or "PUT" +# # method = "POST" +# +# ## HTTP Basic Auth credentials +# # username = "username" +# # password = "pa$$word" +# +# ## OAuth2 Client Credentials Grant +# # client_id = "clientid" +# # client_secret = "secret" +# # token_url = "https://indentityprovider/oauth2/v1/token" +# # scopes = ["urn:opc:idm:__myscopes__"] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to output. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# # data_format = "influx" +# +# ## HTTP Content-Encoding for write request body, can be set to "gzip" to +# ## compress body or "identity" to apply no encoding. +# # content_encoding = "identity" +# +# ## Additional HTTP headers +# # [outputs.http.headers] +# # # Should be set manually to "application/json" for json data_format +# # Content-Type = "text/plain; charset=utf-8" + + +# # Configuration for sending metrics to InfluxDB +# [[outputs.influxdb_v2]] +# ## The URLs of the InfluxDB cluster nodes. +# ## +# ## Multiple URLs can be specified for a single cluster, only ONE of the +# ## urls will be written to each interval. +# ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] +# urls = ["http://127.0.0.1:9999"] +# +# ## Token for authentication. +# token = "" +# +# ## Organization is the name of the organization you wish to write to; must exist. +# organization = "" +# +# ## Destination bucket to write into. +# bucket = "" +# +# ## The value of this tag will be used to determine the bucket. If this +# ## tag is not set the 'bucket' option is used as the default. +# # bucket_tag = "" +# +# ## If true, the bucket tag will not be added to the metric. +# # exclude_bucket_tag = false +# +# ## Timeout for HTTP messages. +# # timeout = "5s" +# +# ## Additional HTTP headers +# # http_headers = {"X-Special-Header" = "Special-Value"} +# +# ## HTTP Proxy override, if unset values the standard proxy environment +# ## variables are consulted to determine which proxy, if any, should be used. +# # http_proxy = "http://corporate.proxy:3128" +# +# ## HTTP User-Agent +# # user_agent = "telegraf" +# +# ## Content-Encoding for write request body, can be set to "gzip" to +# ## compress body or "identity" to apply no encoding. +# # content_encoding = "gzip" +# +# ## Enable or disable uint support for writing uints influxdb 2.0. +# # influx_uint_support = false +# +# ## Optional TLS Config for use on HTTP connections. +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Configuration for sending metrics to an Instrumental project +# [[outputs.instrumental]] +# ## Project API Token (required) +# api_token = "API Token" # required +# ## Prefix the metrics with a given name +# prefix = "" +# ## Stats output template (Graphite formatting) +# ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite +# template = "host.tags.measurement.field" +# ## Timeout in seconds to connect +# timeout = "2s" +# ## Display Communcation to Instrumental +# debug = false + + +# # Configuration for the Kafka server to send metrics to +# [[outputs.kafka]] +# ## URLs of kafka brokers +# brokers = ["localhost:9092"] +# ## Kafka topic for producer messages +# topic = "telegraf" +# +# ## The value of this tag will be used as the topic. If not set the 'topic' +# ## option is used. +# # topic_tag = "" +# +# ## If true, the 'topic_tag' will be removed from to the metric. +# # exclude_topic_tag = false +# +# ## Optional Client id +# # client_id = "Telegraf" +# +# ## Set the minimal supported Kafka version. Setting this enables the use of new +# ## Kafka features and APIs. Of particular interest, lz4 compression +# ## requires at least version 0.10.0.0. +# ## ex: version = "1.1.0" +# # version = "" +# +# ## Optional topic suffix configuration. +# ## If the section is omitted, no suffix is used. +# ## Following topic suffix methods are supported: +# ## measurement - suffix equals to separator + measurement's name +# ## tags - suffix equals to separator + specified tags' values +# ## interleaved with separator +# +# ## Suffix equals to "_" + measurement name +# # [outputs.kafka.topic_suffix] +# # method = "measurement" +# # separator = "_" +# +# ## Suffix equals to "__" + measurement's "foo" tag value. +# ## If there's no such a tag, suffix equals to an empty string +# # [outputs.kafka.topic_suffix] +# # method = "tags" +# # keys = ["foo"] +# # separator = "__" +# +# ## Suffix equals to "_" + measurement's "foo" and "bar" +# ## tag values, separated by "_". If there is no such tags, +# ## their values treated as empty strings. +# # [outputs.kafka.topic_suffix] +# # method = "tags" +# # keys = ["foo", "bar"] +# # separator = "_" +# +# ## The routing tag specifies a tagkey on the metric whose value is used as +# ## the message key. The message key is used to determine which partition to +# ## send the message to. This tag is prefered over the routing_key option. +# routing_tag = "host" +# +# ## The routing key is set as the message key and used to determine which +# ## partition to send the message to. This value is only used when no +# ## routing_tag is set or as a fallback when the tag specified in routing tag +# ## is not found. +# ## +# ## If set to "random", a random value will be generated for each message. +# ## +# ## When unset, no message key is added and each message is routed to a random +# ## partition. +# ## +# ## ex: routing_key = "random" +# ## routing_key = "telegraf" +# # routing_key = "" +# +# ## CompressionCodec represents the various compression codecs recognized by +# ## Kafka in messages. +# ## 0 : No compression +# ## 1 : Gzip compression +# ## 2 : Snappy compression +# ## 3 : LZ4 compression +# # compression_codec = 0 +# +# ## RequiredAcks is used in Produce Requests to tell the broker how many +# ## replica acknowledgements it must see before responding +# ## 0 : the producer never waits for an acknowledgement from the broker. +# ## This option provides the lowest latency but the weakest durability +# ## guarantees (some data will be lost when a server fails). +# ## 1 : the producer gets an acknowledgement after the leader replica has +# ## received the data. This option provides better durability as the +# ## client waits until the server acknowledges the request as successful +# ## (only messages that were written to the now-dead leader but not yet +# ## replicated will be lost). +# ## -1: the producer gets an acknowledgement after all in-sync replicas have +# ## received the data. This option provides the best durability, we +# ## guarantee that no messages will be lost as long as at least one in +# ## sync replica remains. +# # required_acks = -1 +# +# ## The maximum number of times to retry sending a metric before failing +# ## until the next flush. +# # max_retry = 3 +# +# ## The maximum permitted size of a message. Should be set equal to or +# ## smaller than the broker's 'message.max.bytes'. +# # max_message_bytes = 1000000 +# +# ## Optional TLS Config +# # enable_tls = true +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Optional SASL Config +# # sasl_username = "kafka" +# # sasl_password = "secret" +# +# ## SASL protocol version. When connecting to Azure EventHub set to 0. +# # sasl_version = 1 +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# # data_format = "influx" + + +# # Configuration for the AWS Kinesis output. +# [[outputs.kinesis]] +# ## Amazon REGION of kinesis endpoint. +# region = "ap-southeast-2" +# +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile +# #access_key = "" +# #secret_key = "" +# #token = "" +# #role_arn = "" +# #profile = "" +# #shared_credential_file = "" +# +# ## Endpoint to make request against, the correct endpoint is automatically +# ## determined and this option should only be set if you wish to override the +# ## default. +# ## ex: endpoint_url = "http://localhost:8000" +# # endpoint_url = "" +# +# ## Kinesis StreamName must exist prior to starting telegraf. +# streamname = "StreamName" +# ## DEPRECATED: PartitionKey as used for sharding data. +# partitionkey = "PartitionKey" +# ## DEPRECATED: If set the paritionKey will be a random UUID on every put. +# ## This allows for scaling across multiple shards in a stream. +# ## This will cause issues with ordering. +# use_random_partitionkey = false +# ## The partition key can be calculated using one of several methods: +# ## +# ## Use a static value for all writes: +# # [outputs.kinesis.partition] +# # method = "static" +# # key = "howdy" +# # +# ## Use a random partition key on each write: +# # [outputs.kinesis.partition] +# # method = "random" +# # +# ## Use the measurement name as the partition key: +# # [outputs.kinesis.partition] +# # method = "measurement" +# # +# ## Use the value of a tag for all writes, if the tag is not set the empty +# ## default option will be used. When no default, defaults to "telegraf" +# # [outputs.kinesis.partition] +# # method = "tag" +# # key = "host" +# # default = "mykey" +# +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" +# +# ## debug will show upstream aws messages. +# debug = false + + +# # Configuration for Librato API to send metrics to. +# [[outputs.librato]] +# ## Librator API Docs +# ## http://dev.librato.com/v1/metrics-authentication +# ## Librato API user +# api_user = "telegraf@influxdb.com" # required. +# ## Librato API token +# api_token = "my-secret-token" # required. +# ## Debug +# # debug = false +# ## Connection timeout. +# # timeout = "5s" +# ## Output source Template (same as graphite buckets) +# ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite +# ## This template is used in librato's source (not metric's name) +# template = "host" +# + + +# # Configuration for MQTT server to send metrics to +# [[outputs.mqtt]] +# servers = ["localhost:1883"] # required. +# +# ## MQTT outputs send metrics to this topic format +# ## "///" +# ## ex: prefix/web01.example.com/mem +# topic_prefix = "telegraf" +# +# ## QoS policy for messages +# ## 0 = at most once +# ## 1 = at least once +# ## 2 = exactly once +# # qos = 2 +# +# ## username and password to connect MQTT server. +# # username = "telegraf" +# # password = "metricsmetricsmetricsmetrics" +# +# ## client ID, if not set a random ID is generated +# # client_id = "" +# +# ## Timeout for write operations. default: 5s +# # timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## When true, metrics will be sent in one MQTT message per flush. Otherwise, +# ## metrics are written one metric per MQTT message. +# # batch = false +# +# ## When true, metric will have RETAIN flag set, making broker cache entries until someone +# ## actually reads it +# # retain = false +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Send telegraf measurements to NATS +# [[outputs.nats]] +# ## URLs of NATS servers +# servers = ["nats://localhost:4222"] +# +# ## Optional credentials +# # username = "" +# # password = "" +# +# ## Optional NATS 2.0 and NATS NGS compatible user credentials +# # credentials = "/etc/telegraf/nats.creds" +# +# ## NATS subject for producer messages +# subject = "telegraf" +# +# ## Use Transport Layer Security +# # secure = false +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Send telegraf measurements to NSQD +# [[outputs.nsq]] +# ## Location of nsqd instance listening on TCP +# server = "localhost:4150" +# ## NSQ topic for producer messages +# topic = "telegraf" +# +# ## Data format to output. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Configuration for OpenTSDB server to send metrics to +# [[outputs.opentsdb]] +# ## prefix for metrics keys +# prefix = "my.specific.prefix." +# +# ## DNS name of the OpenTSDB server +# ## Using "opentsdb.example.com" or "tcp://opentsdb.example.com" will use the +# ## telnet API. "http://opentsdb.example.com" will use the Http API. +# host = "opentsdb.example.com" +# +# ## Port of the OpenTSDB server +# port = 4242 +# +# ## Number of data points to send to OpenTSDB in Http requests. +# ## Not used with telnet API. +# http_batch_size = 50 +# +# ## URI Path for Http requests to OpenTSDB. +# ## Used in cases where OpenTSDB is located behind a reverse proxy. +# http_path = "/api/put" +# +# ## Debug true - Prints OpenTSDB communication +# debug = false +# +# ## Separator separates measurement name from field +# separator = "_" + + +# # Configuration for the Prometheus client to spawn +# [[outputs.prometheus_client]] +# ## Address to listen on +# listen = ":9273" +# +# ## Metric version controls the mapping from Telegraf metrics into +# ## Prometheus format. When using the prometheus input, use the same value in +# ## both plugins to ensure metrics are round-tripped without modification. +# ## +# ## example: metric_version = 1; deprecated in 1.13 +# ## metric_version = 2; recommended version +# # metric_version = 1 +# +# ## Use HTTP Basic Authentication. +# # basic_username = "Foo" +# # basic_password = "Bar" +# +# ## If set, the IP Ranges which are allowed to access metrics. +# ## ex: ip_range = ["192.168.0.0/24", "192.168.1.0/30"] +# # ip_range = [] +# +# ## Path to publish the metrics on. +# # path = "/metrics" +# +# ## Expiration interval for each metric. 0 == no expiration +# # expiration_interval = "60s" +# +# ## Collectors to enable, valid entries are "gocollector" and "process". +# ## If unset, both are enabled. +# # collectors_exclude = ["gocollector", "process"] +# +# ## Send string metrics as Prometheus labels. +# ## Unless set to false all string metrics will be sent as labels. +# # string_as_label = true +# +# ## If set, enable TLS with the given certificate. +# # tls_cert = "/etc/ssl/telegraf.crt" +# # tls_key = "/etc/ssl/telegraf.key" +# +# ## Set one or more allowed client CA certificate file names to +# ## enable mutually authenticated TLS connections +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Export metric collection time. +# # export_timestamp = false + + +# # Configuration for the Riemann server to send metrics to +# [[outputs.riemann]] +# ## The full TCP or UDP URL of the Riemann server +# url = "tcp://localhost:5555" +# +# ## Riemann event TTL, floating-point time in seconds. +# ## Defines how long that an event is considered valid for in Riemann +# # ttl = 30.0 +# +# ## Separator to use between measurement and field name in Riemann service name +# ## This does not have any effect if 'measurement_as_attribute' is set to 'true' +# separator = "/" +# +# ## Set measurement name as Riemann attribute 'measurement', instead of prepending it to the Riemann service name +# # measurement_as_attribute = false +# +# ## Send string metrics as Riemann event states. +# ## Unless enabled all string metrics will be ignored +# # string_as_state = false +# +# ## A list of tag keys whose values get sent as Riemann tags. +# ## If empty, all Telegraf tag values will be sent as tags +# # tag_keys = ["telegraf","custom_tag"] +# +# ## Additional Riemann tags to send. +# # tags = ["telegraf-output"] +# +# ## Description for Riemann event +# # description_text = "metrics collected from telegraf" +# +# ## Riemann client write timeout, defaults to "5s" if not set. +# # timeout = "5s" + + +# # Configuration for the Riemann server to send metrics to +# [[outputs.riemann_legacy]] +# ## URL of server +# url = "localhost:5555" +# ## transport protocol to use either tcp or udp +# transport = "tcp" +# ## separator to use between input name and field name in Riemann service name +# separator = " " + + +# # Generic socket writer capable of handling multiple socket types. +# [[outputs.socket_writer]] +# ## URL to connect to +# # address = "tcp://127.0.0.1:8094" +# # address = "tcp://example.com:http" +# # address = "tcp4://127.0.0.1:8094" +# # address = "tcp6://127.0.0.1:8094" +# # address = "tcp6://[2001:db8::1]:8094" +# # address = "udp://127.0.0.1:8094" +# # address = "udp4://127.0.0.1:8094" +# # address = "udp6://127.0.0.1:8094" +# # address = "unix:///tmp/telegraf.sock" +# # address = "unixgram:///tmp/telegraf.sock" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Period between keep alive probes. +# ## Only applies to TCP sockets. +# ## 0 disables keep alive probes. +# ## Defaults to the OS configuration. +# # keep_alive_period = "5m" +# +# ## Data format to generate. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# # data_format = "influx" + + +# # Configuration for Google Cloud Stackdriver to send metrics to +# [[outputs.stackdriver]] +# ## GCP Project +# project = "erudite-bloom-151019" +# +# ## The namespace for the metric descriptor +# namespace = "telegraf" +# +# ## Custom resource type +# # resource_type = "generic_node" +# +# ## Additonal resource labels +# # [outputs.stackdriver.resource_labels] +# # node_id = "$HOSTNAME" +# # namespace = "myapp" +# # location = "eu-north0" + + +# # Configuration for Syslog server to send metrics to +# [[outputs.syslog]] +# ## URL to connect to +# ## ex: address = "tcp://127.0.0.1:8094" +# ## ex: address = "tcp4://127.0.0.1:8094" +# ## ex: address = "tcp6://127.0.0.1:8094" +# ## ex: address = "tcp6://[2001:db8::1]:8094" +# ## ex: address = "udp://127.0.0.1:8094" +# ## ex: address = "udp4://127.0.0.1:8094" +# ## ex: address = "udp6://127.0.0.1:8094" +# address = "tcp://127.0.0.1:8094" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Period between keep alive probes. +# ## Only applies to TCP sockets. +# ## 0 disables keep alive probes. +# ## Defaults to the OS configuration. +# # keep_alive_period = "5m" +# +# ## The framing technique with which it is expected that messages are +# ## transported (default = "octet-counting"). Whether the messages come +# ## using the octect-counting (RFC5425#section-4.3.1, RFC6587#section-3.4.1), +# ## or the non-transparent framing technique (RFC6587#section-3.4.2). Must +# ## be one of "octet-counting", "non-transparent". +# # framing = "octet-counting" +# +# ## The trailer to be expected in case of non-trasparent framing (default = "LF"). +# ## Must be one of "LF", or "NUL". +# # trailer = "LF" +# +# ## SD-PARAMs settings +# ## Syslog messages can contain key/value pairs within zero or more +# ## structured data sections. For each unrecognised metric tag/field a +# ## SD-PARAMS is created. +# ## +# ## Example: +# ## [[outputs.syslog]] +# ## sdparam_separator = "_" +# ## default_sdid = "default@32473" +# ## sdids = ["foo@123", "bar@456"] +# ## +# ## input => xyzzy,x=y foo@123_value=42,bar@456_value2=84,something_else=1 +# ## output (structured data only) => [foo@123 value=42][bar@456 value2=84][default@32473 something_else=1 x=y] +# +# ## SD-PARAMs separator between the sdid and tag/field key (default = "_") +# # sdparam_separator = "_" +# +# ## Default sdid used for tags/fields that don't contain a prefix defined in +# ## the explict sdids setting below If no default is specified, no SD-PARAMs +# ## will be used for unrecognised field. +# # default_sdid = "default@32473" +# +# ## List of explicit prefixes to extract from tag/field keys and use as the +# ## SDID, if they match (see above example for more details): +# # sdids = ["foo@123", "bar@456"] +# +# ## Default severity value. Severity and Facility are used to calculate the +# ## message PRI value (RFC5424#section-6.2.1). Used when no metric field +# ## with key "severity_code" is defined. If unset, 5 (notice) is the default +# # default_severity_code = 5 +# +# ## Default facility value. Facility and Severity are used to calculate the +# ## message PRI value (RFC5424#section-6.2.1). Used when no metric field with +# ## key "facility_code" is defined. If unset, 1 (user-level) is the default +# # default_facility_code = 1 +# +# ## Default APP-NAME value (RFC5424#section-6.2.5) +# ## Used when no metric tag with key "appname" is defined. +# ## If unset, "Telegraf" is the default +# # default_appname = "Telegraf" + + +# # Write metrics to Warp 10 +# [[outputs.warp10]] +# # Prefix to add to the measurement. +# prefix = "telegraf." +# +# # URL of the Warp 10 server +# warp_url = "http://localhost:8080" +# +# # Write token to access your app on warp 10 +# token = "Token" +# +# # Warp 10 query timeout +# # timeout = "15s" +# +# ## Print Warp 10 error body +# # print_error_body = false +# +# ## Max string error size +# # max_string_error_size = 511 +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Configuration for Wavefront server to send metrics to +# [[outputs.wavefront]] +# ## Url for Wavefront Direct Ingestion or using HTTP with Wavefront Proxy +# ## If using Wavefront Proxy, also specify port. example: http://proxyserver:2878 +# url = "https://metrics.wavefront.com" +# +# ## Authentication Token for Wavefront. Only required if using Direct Ingestion +# #token = "DUMMY_TOKEN" +# +# ## DNS name of the wavefront proxy server. Do not use if url is specified +# #host = "wavefront.example.com" +# +# ## Port that the Wavefront proxy server listens on. Do not use if url is specified +# #port = 2878 +# +# ## prefix for metrics keys +# #prefix = "my.specific.prefix." +# +# ## whether to use "value" for name of simple fields. default is false +# #simple_fields = false +# +# ## character to use between metric and field name. default is . (dot) +# #metric_separator = "." +# +# ## Convert metric name paths to use metricSeparator character +# ## When true will convert all _ (underscore) characters in final metric name. default is true +# #convert_paths = true +# +# ## Use Strict rules to sanitize metric and tag names from invalid characters +# ## When enabled forward slash (/) and comma (,) will be accpeted +# #use_strict = false +# +# ## Use Regex to sanitize metric and tag names from invalid characters +# ## Regex is more thorough, but significantly slower. default is false +# #use_regex = false +# +# ## point tags to use as the source name for Wavefront (if none found, host will be used) +# #source_override = ["hostname", "address", "agent_host", "node_host"] +# +# ## whether to convert boolean values to numeric values, with false -> 0.0 and true -> 1.0. default is true +# #convert_bool = true +# +# ## Define a mapping, namespaced by metric prefix, from string values to numeric values +# ## deprecated in 1.9; use the enum processor plugin +# #[[outputs.wavefront.string_to_number.elasticsearch]] +# # green = 1.0 +# # yellow = 0.5 +# # red = 0.0 + + +############################################################################### +# PROCESSOR PLUGINS # +############################################################################### + + +# # Clone metrics and apply modifications. +# [[processors.clone]] +# ## All modifications on inputs and aggregators can be overridden: +# # name_override = "new_name" +# # name_prefix = "new_name_prefix" +# # name_suffix = "new_name_suffix" +# +# ## Tags to be added (all values must be strings) +# # [processors.clone.tags] +# # additional_tag = "tag_value" + + +# # Convert values to another metric value type +# [[processors.converter]] +# ## Tags to convert +# ## +# ## The table key determines the target type, and the array of key-values +# ## select the keys to convert. The array may contain globs. +# ## = [...] +# [processors.converter.tags] +# measurement = [] +# string = [] +# integer = [] +# unsigned = [] +# boolean = [] +# float = [] +# +# ## Fields to convert +# ## +# ## The table key determines the target type, and the array of key-values +# ## select the keys to convert. The array may contain globs. +# ## = [...] +# [processors.converter.fields] +# measurement = [] +# tag = [] +# string = [] +# integer = [] +# unsigned = [] +# boolean = [] +# float = [] + + +# # Dates measurements, tags, and fields that pass through this filter. +# [[processors.date]] +# ## New tag to create +# tag_key = "month" +# +# ## Date format string, must be a representation of the Go "reference time" +# ## which is "Mon Jan 2 15:04:05 -0700 MST 2006". +# date_format = "Jan" +# +# ## Offset duration added to the date string when writing the new tag. +# # date_offset = "0s" +# +# ## Timezone to use when creating the tag. This can be set to one of +# ## "UTC", "Local", or to a location name in the IANA Time Zone database. +# ## example: timezone = "America/Los_Angeles" +# # timezone = "UTC" + + +# # Filter metrics with repeating field values +# [[processors.dedup]] +# ## Maximum time to suppress output +# dedup_interval = "600s" + + +# # Map enum values according to given table. +# [[processors.enum]] +# [[processors.enum.mapping]] +# ## Name of the field to map +# field = "status" +# +# ## Name of the tag to map +# # tag = "status" +# +# ## Destination tag or field to be used for the mapped value. By default the +# ## source tag or field is used, overwriting the original value. +# dest = "status_code" +# +# ## Default value to be used for all values not contained in the mapping +# ## table. When unset, the unmodified value for the field will be used if no +# ## match is found. +# # default = 0 +# +# ## Table of mappings +# [processors.enum.mapping.value_mappings] +# green = 1 +# amber = 2 +# red = 3 + + +# # Apply metric modifications using override semantics. +# [[processors.override]] +# ## All modifications on inputs and aggregators can be overridden: +# # name_override = "new_name" +# # name_prefix = "new_name_prefix" +# # name_suffix = "new_name_suffix" +# +# ## Tags to be added (all values must be strings) +# # [processors.override.tags] +# # additional_tag = "tag_value" + + +# # Parse a value in a specified field/tag(s) and add the result in a new metric +# [[processors.parser]] +# ## The name of the fields whose value will be parsed. +# parse_fields = [] +# +# ## If true, incoming metrics are not emitted. +# drop_original = false +# +# ## If set to override, emitted metrics will be merged by overriding the +# ## original metric using the newly parsed metrics. +# merge = "override" +# +# ## The dataformat to be read from files +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Rotate a single valued metric into a multi field metric +# [[processors.pivot]] +# ## Tag to use for naming the new field. +# tag_key = "name" +# ## Field to use as the value of the new field. +# value_key = "value" + + +# # Print all metrics that pass through this filter. +# [[processors.printer]] + + +# # Transforms tag and field values with regex pattern +# [[processors.regex]] +# ## Tag and field conversions defined in a separate sub-tables +# # [[processors.regex.tags]] +# # ## Tag to change +# # key = "resp_code" +# # ## Regular expression to match on a tag value +# # pattern = "^(\\d)\\d\\d$" +# # ## Matches of the pattern will be replaced with this string. Use ${1} +# # ## notation to use the text of the first submatch. +# # replacement = "${1}xx" +# +# # [[processors.regex.fields]] +# # ## Field to change +# # key = "request" +# # ## All the power of the Go regular expressions available here +# # ## For example, named subgroups +# # pattern = "^/api(?P/[\\w/]+)\\S*" +# # replacement = "${method}" +# # ## If result_key is present, a new field will be created +# # ## instead of changing existing field +# # result_key = "method" +# +# ## Multiple conversions may be applied for one field sequentially +# ## Let's extract one more value +# # [[processors.regex.fields]] +# # key = "request" +# # pattern = ".*category=(\\w+).*" +# # replacement = "${1}" +# # result_key = "search_category" + + +# # Rename measurements, tags, and fields that pass through this filter. +# [[processors.rename]] + + +# # Add the S2 Cell ID as a tag based on latitude and longitude fields +# [[processors.s2geo]] +# ## The name of the lat and lon fields containing WGS-84 latitude and +# ## longitude in decimal degrees. +# # lat_field = "lat" +# # lon_field = "lon" +# +# ## New tag to create +# # tag_key = "s2_cell_id" +# +# ## Cell level (see https://s2geometry.io/resources/s2cell_statistics.html) +# # cell_level = 9 + + +# # Perform string processing on tags, fields, and measurements +# [[processors.strings]] +# ## Convert a tag value to uppercase +# # [[processors.strings.uppercase]] +# # tag = "method" +# +# ## Convert a field value to lowercase and store in a new field +# # [[processors.strings.lowercase]] +# # field = "uri_stem" +# # dest = "uri_stem_normalised" +# +# ## Convert a field value to titlecase +# # [[processors.strings.titlecase]] +# # field = "status" +# +# ## Trim leading and trailing whitespace using the default cutset +# # [[processors.strings.trim]] +# # field = "message" +# +# ## Trim leading characters in cutset +# # [[processors.strings.trim_left]] +# # field = "message" +# # cutset = "\t" +# +# ## Trim trailing characters in cutset +# # [[processors.strings.trim_right]] +# # field = "message" +# # cutset = "\r\n" +# +# ## Trim the given prefix from the field +# # [[processors.strings.trim_prefix]] +# # field = "my_value" +# # prefix = "my_" +# +# ## Trim the given suffix from the field +# # [[processors.strings.trim_suffix]] +# # field = "read_count" +# # suffix = "_count" +# +# ## Replace all non-overlapping instances of old with new +# # [[processors.strings.replace]] +# # measurement = "*" +# # old = ":" +# # new = "_" +# +# ## Trims strings based on width +# # [[processors.strings.left]] +# # field = "message" +# # width = 10 +# +# ## Decode a base64 encoded utf-8 string +# # [[processors.strings.base64decode]] +# # field = "message" + + +# # Restricts the number of tags that can pass through this filter and chooses which tags to preserve when over the limit. +# [[processors.tag_limit]] +# ## Maximum number of tags to preserve +# limit = 10 +# +# ## List of tags to preferentially preserve +# keep = ["foo", "bar", "baz"] + + +# # Uses a Go template to create a new tag +# [[processors.template]] +# ## Tag to set with the output of the template. +# tag = "topic" +# +# ## Go template used to create the tag value. In order to ease TOML +# ## escaping requirements, you may wish to use single quotes around the +# ## template string. +# template = '{{ .Tag "hostname" }}.{{ .Tag "level" }}' + + +# # Print all metrics that pass through this filter. +# [[processors.topk]] +# ## How many seconds between aggregations +# # period = 10 +# +# ## How many top metrics to return +# # k = 10 +# +# ## Over which tags should the aggregation be done. Globs can be specified, in +# ## which case any tag matching the glob will aggregated over. If set to an +# ## empty list is no aggregation over tags is done +# # group_by = ['*'] +# +# ## Over which fields are the top k are calculated +# # fields = ["value"] +# +# ## What aggregation to use. Options: sum, mean, min, max +# # aggregation = "mean" +# +# ## Instead of the top k largest metrics, return the bottom k lowest metrics +# # bottomk = false +# +# ## The plugin assigns each metric a GroupBy tag generated from its name and +# ## tags. If this setting is different than "" the plugin will add a +# ## tag (which name will be the value of this setting) to each metric with +# ## the value of the calculated GroupBy tag. Useful for debugging +# # add_groupby_tag = "" +# +# ## These settings provide a way to know the position of each metric in +# ## the top k. The 'add_rank_field' setting allows to specify for which +# ## fields the position is required. If the list is non empty, then a field +# ## will be added to each and every metric for each string present in this +# ## setting. This field will contain the ranking of the group that +# ## the metric belonged to when aggregated over that field. +# ## The name of the field will be set to the name of the aggregation field, +# ## suffixed with the string '_topk_rank' +# # add_rank_fields = [] +# +# ## These settings provide a way to know what values the plugin is generating +# ## when aggregating metrics. The 'add_agregate_field' setting allows to +# ## specify for which fields the final aggregation value is required. If the +# ## list is non empty, then a field will be added to each every metric for +# ## each field present in this setting. This field will contain +# ## the computed aggregation for the group that the metric belonged to when +# ## aggregated over that field. +# ## The name of the field will be set to the name of the aggregation field, +# ## suffixed with the string '_topk_aggregate' +# # add_aggregate_fields = [] + + +# # Rotate multi field metric into several single field metrics +# [[processors.unpivot]] +# ## Tag to use for the name. +# tag_key = "name" +# ## Field to use for the name of the value. +# value_key = "value" + + +############################################################################### +# AGGREGATOR PLUGINS # +############################################################################### + + +# # Keep the aggregate basicstats of each metric passing through. +# [[aggregators.basicstats]] +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false +# +# ## Configures which basic stats to push as fields +# # stats = ["count", "min", "max", "mean", "stdev", "s2", "sum"] + + +# # Report the final metric of a series +# [[aggregators.final]] +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false +# +# ## The time that a series is not updated until considering it final. +# series_timeout = "5m" + + +# # Create aggregate histograms. +# [[aggregators.histogram]] +# ## The period in which to flush the aggregator. +# period = "30s" +# +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false +# +# ## If true, the histogram will be reset on flush instead +# ## of accumulating the results. +# reset = false +# +# ## Whether bucket values should be accumulated. If set to false, "gt" tag will be added. +# ## Defaults to true. +# cumulative = true +# +# ## Example config that aggregates all fields of the metric. +# # [[aggregators.histogram.config]] +# # ## Right borders of buckets (with +Inf implicitly added). +# # buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0] +# # ## The name of metric. +# # measurement_name = "cpu" +# +# ## Example config that aggregates only specific fields of the metric. +# # [[aggregators.histogram.config]] +# # ## Right borders of buckets (with +Inf implicitly added). +# # buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] +# # ## The name of metric. +# # measurement_name = "diskio" +# # ## The concrete fields of metric +# # fields = ["io_time", "read_time", "write_time"] + + +# # Merge metrics into multifield metrics by series key +# [[aggregators.merge]] +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = true + + +# # Keep the aggregate min/max of each metric passing through. +# [[aggregators.minmax]] +# ## General Aggregator Arguments: +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false + + +# # Count the occurrence of values in fields. +# [[aggregators.valuecounter]] +# ## General Aggregator Arguments: +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false +# ## The fields for which the values will be counted +# fields = [] + + +############################################################################### +# INPUT PLUGINS # +############################################################################### + + +# Read metrics about cpu usage +[[inputs.cpu]] + ## Whether to report per-cpu stats or not + percpu = true + ## Whether to report total system cpu stats or not + totalcpu = true + ## If true, collect raw CPU time metrics. + collect_cpu_time = false + ## If true, compute and report the sum of all non-idle CPU states. + report_active = false + + +# Read metrics about disk usage by mount point +[[inputs.disk]] + ## By default stats will be gathered for all mount points. + ## Set mount_points will restrict the stats to only the specified mount points. + # mount_points = ["/"] + + ## Ignore mount points by filesystem type. + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] + + +# Read metrics about disk IO by device +[[inputs.diskio]] + ## By default, telegraf will gather stats for all devices including + ## disk partitions. + ## Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb", "vd*"] + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false + # + ## On systems which support it, device metadata can be added in the form of + ## tags. + ## Currently only Linux is supported via udev properties. You can view + ## available properties for a device by running: + ## 'udevadm info -q property -n /dev/sda' + ## Note: Most, but not all, udev properties can be accessed this way. Properties + ## that are currently inaccessible include DEVTYPE, DEVNAME, and DEVPATH. + # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"] + # + ## Using the same metadata source as device_tags, you can also customize the + ## name of the device via templates. + ## The 'name_templates' parameter is a list of templates to try and apply to + ## the device. The template may contain variables in the form of '$PROPERTY' or + ## '${PROPERTY}'. The first template which does not contain any variables not + ## present for the device is used as the device name tag. + ## The typical use case is for LVM volumes, to get the VG/LV name instead of + ## the near-meaningless DM-0 name. + # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"] + + +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration + + +# Read metrics about memory usage +[[inputs.mem]] + # no configuration + + +# Get the number of processes and group them by status +[[inputs.processes]] + # no configuration + + +# Read metrics about swap memory usage +[[inputs.swap]] + # no configuration + + +# Read metrics about system load & uptime +[[inputs.system]] + ## Uncomment to remove deprecated metrics. + # fielddrop = ["uptime_format"] + + +# # Gather ActiveMQ metrics +# [[inputs.activemq]] +# ## ActiveMQ WebConsole URL +# url = "http://127.0.0.1:8161" +# +# ## Required ActiveMQ Endpoint +# ## deprecated in 1.11; use the url option +# # server = "127.0.0.1" +# # port = 8161 +# +# ## Credentials for basic HTTP authentication +# # username = "admin" +# # password = "admin" +# +# ## Required ActiveMQ webadmin root path +# # webadmin = "admin" +# +# ## Maximum time to receive response. +# # response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read stats from aerospike server(s) +# [[inputs.aerospike]] +# ## Aerospike servers to connect to (with port) +# ## This plugin will query all namespaces the aerospike +# ## server has configured and get stats for them. +# servers = ["localhost:3000"] +# +# # username = "telegraf" +# # password = "pa$$word" +# +# ## Optional TLS Config +# # enable_tls = false +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## If false, skip chain & host verification +# # insecure_skip_verify = true + + +# # Read Apache status information (mod_status) +# [[inputs.apache]] +# ## An array of URLs to gather from, must be directed at the machine +# ## readable version of the mod_status page including the auto query string. +# ## Default is "http://localhost/server-status?auto". +# urls = ["http://localhost/server-status?auto"] +# +# ## Credentials for basic HTTP authentication. +# # username = "myuser" +# # password = "mypassword" +# +# ## Maximum time to receive response. +# # response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Monitor APC UPSes connected to apcupsd +# [[inputs.apcupsd]] +# # A list of running apcupsd server to connect to. +# # If not provided will default to tcp://127.0.0.1:3551 +# servers = ["tcp://127.0.0.1:3551"] +# +# ## Timeout for dialing server. +# timeout = "5s" + + +# # Gather metrics from Apache Aurora schedulers +# [[inputs.aurora]] +# ## Schedulers are the base addresses of your Aurora Schedulers +# schedulers = ["http://127.0.0.1:8081"] +# +# ## Set of role types to collect metrics from. +# ## +# ## The scheduler roles are checked each interval by contacting the +# ## scheduler nodes; zookeeper is not contacted. +# # roles = ["leader", "follower"] +# +# ## Timeout is the max time for total network operations. +# # timeout = "5s" +# +# ## Username and password are sent using HTTP Basic Auth. +# # username = "username" +# # password = "pa$$word" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Gather Azure Storage Queue metrics +# [[inputs.azure_storage_queue]] +# ## Required Azure Storage Account name +# account_name = "mystorageaccount" +# +# ## Required Azure Storage Account access key +# account_key = "storageaccountaccesskey" +# +# ## Set to false to disable peeking age of oldest message (executes faster) +# # peek_oldest_message_age = true + + +# # Read metrics of bcache from stats_total and dirty_data +# [[inputs.bcache]] +# ## Bcache sets path +# ## If not specified, then default is: +# bcachePath = "/sys/fs/bcache" +# +# ## By default, telegraf gather stats for all bcache devices +# ## Setting devices will restrict the stats to the specified +# ## bcache devices. +# bcacheDevs = ["bcache0"] + + +# # Collects Beanstalkd server and tubes stats +# [[inputs.beanstalkd]] +# ## Server to collect data from +# server = "localhost:11300" +# +# ## List of tubes to gather stats about. +# ## If no tubes specified then data gathered for each tube on server reported by list-tubes command +# tubes = ["notifications"] + + +# # Read BIND nameserver XML statistics +# [[inputs.bind]] +# ## An array of BIND XML statistics URI to gather stats. +# ## Default is "http://localhost:8053/xml/v3". +# # urls = ["http://localhost:8053/xml/v3"] +# # gather_memory_contexts = false +# # gather_views = false + + +# # Collect bond interface status, slaves statuses and failures count +# [[inputs.bond]] +# ## Sets 'proc' directory path +# ## If not specified, then default is /proc +# # host_proc = "/proc" +# +# ## By default, telegraf gather stats for all bond interfaces +# ## Setting interfaces will restrict the stats to the specified +# ## bond interfaces. +# # bond_interfaces = ["bond0"] + + +# # Collect Kafka topics and consumers status from Burrow HTTP API. +# [[inputs.burrow]] +# ## Burrow API endpoints in format "schema://host:port". +# ## Default is "http://localhost:8000". +# servers = ["http://localhost:8000"] +# +# ## Override Burrow API prefix. +# ## Useful when Burrow is behind reverse-proxy. +# # api_prefix = "/v3/kafka" +# +# ## Maximum time to receive response. +# # response_timeout = "5s" +# +# ## Limit per-server concurrent connections. +# ## Useful in case of large number of topics or consumer groups. +# # concurrent_connections = 20 +# +# ## Filter clusters, default is no filtering. +# ## Values can be specified as glob patterns. +# # clusters_include = [] +# # clusters_exclude = [] +# +# ## Filter consumer groups, default is no filtering. +# ## Values can be specified as glob patterns. +# # groups_include = [] +# # groups_exclude = [] +# +# ## Filter topics, default is no filtering. +# ## Values can be specified as glob patterns. +# # topics_include = [] +# # topics_exclude = [] +# +# ## Credentials for basic HTTP authentication. +# # username = "" +# # password = "" +# +# ## Optional SSL config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# # insecure_skip_verify = false + + +# # Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster. +# [[inputs.ceph]] +# ## This is the recommended interval to poll. Too frequent and you will lose +# ## data points due to timeouts during rebalancing and recovery +# interval = '1m' +# +# ## All configuration values are optional, defaults are shown below +# +# ## location of ceph binary +# ceph_binary = "/usr/bin/ceph" +# +# ## directory in which to look for socket files +# socket_dir = "/var/run/ceph" +# +# ## prefix of MON and OSD socket files, used to determine socket type +# mon_prefix = "ceph-mon" +# osd_prefix = "ceph-osd" +# +# ## suffix used to identify socket files +# socket_suffix = "asok" +# +# ## Ceph user to authenticate as +# ceph_user = "client.admin" +# +# ## Ceph configuration to use to locate the cluster +# ceph_config = "/etc/ceph/ceph.conf" +# +# ## Whether to gather statistics via the admin socket +# gather_admin_socket_stats = true +# +# ## Whether to gather statistics via ceph commands +# gather_cluster_stats = false + + +# # Read specific statistics per cgroup +# [[inputs.cgroup]] +# ## Directories in which to look for files, globs are supported. +# ## Consider restricting paths to the set of cgroups you really +# ## want to monitor if you have a large number of cgroups, to avoid +# ## any cardinality issues. +# # paths = [ +# # "/cgroup/memory", +# # "/cgroup/memory/child1", +# # "/cgroup/memory/child2/*", +# # ] +# ## cgroup stat fields, as file names, globs are supported. +# ## these file names are appended to each path from above. +# # files = ["memory.*usage*", "memory.limit_in_bytes"] + + +# # Get standard chrony metrics, requires chronyc executable. +# [[inputs.chrony]] +# ## If true, chronyc tries to perform a DNS lookup for the time server. +# # dns_lookup = false + + +# # Pull Metric Statistics from Amazon CloudWatch +# [[inputs.cloudwatch]] +# ## Amazon Region +# region = "us-east-1" +# +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile +# # access_key = "" +# # secret_key = "" +# # token = "" +# # role_arn = "" +# # profile = "" +# # shared_credential_file = "" +# +# ## Endpoint to make request against, the correct endpoint is automatically +# ## determined and this option should only be set if you wish to override the +# ## default. +# ## ex: endpoint_url = "http://localhost:8000" +# # endpoint_url = "" +# +# # The minimum period for Cloudwatch metrics is 1 minute (60s). However not all +# # metrics are made available to the 1 minute period. Some are collected at +# # 3 minute, 5 minute, or larger intervals. See https://aws.amazon.com/cloudwatch/faqs/#monitoring. +# # Note that if a period is configured that is smaller than the minimum for a +# # particular metric, that metric will not be returned by the Cloudwatch API +# # and will not be collected by Telegraf. +# # +# ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) +# period = "5m" +# +# ## Collection Delay (required - must account for metrics availability via CloudWatch API) +# delay = "5m" +# +# ## Recommended: use metric 'interval' that is a multiple of 'period' to avoid +# ## gaps or overlap in pulled data +# interval = "5m" +# +# ## Configure the TTL for the internal cache of metrics. +# # cache_ttl = "1h" +# +# ## Metric Statistic Namespace (required) +# namespace = "AWS/ELB" +# +# ## Maximum requests per second. Note that the global default AWS rate limit is +# ## 50 reqs/sec, so if you define multiple namespaces, these should add up to a +# ## maximum of 50. +# ## See http://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_limits.html +# # ratelimit = 25 +# +# ## Timeout for http requests made by the cloudwatch client. +# # timeout = "5s" +# +# ## Namespace-wide statistic filters. These allow fewer queries to be made to +# ## cloudwatch. +# # statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ] +# # statistic_exclude = [] +# +# ## Metrics to Pull +# ## Defaults to all Metrics in Namespace if nothing is provided +# ## Refreshes Namespace available metrics every 1h +# #[[inputs.cloudwatch.metrics]] +# # names = ["Latency", "RequestCount"] +# # +# # ## Statistic filters for Metric. These allow for retrieving specific +# # ## statistics for an individual metric. +# # # statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ] +# # # statistic_exclude = [] +# # +# # ## Dimension filters for Metric. All dimensions defined for the metric names +# # ## must be specified in order to retrieve the metric statistics. +# # [[inputs.cloudwatch.metrics.dimensions]] +# # name = "LoadBalancerName" +# # value = "p-example" + + +# # Collects conntrack stats from the configured directories and files. +# [[inputs.conntrack]] +# ## The following defaults would work with multiple versions of conntrack. +# ## Note the nf_ and ip_ filename prefixes are mutually exclusive across +# ## kernel versions, as are the directory locations. +# +# ## Superset of filenames to look for within the conntrack dirs. +# ## Missing files will be ignored. +# files = ["ip_conntrack_count","ip_conntrack_max", +# "nf_conntrack_count","nf_conntrack_max"] +# +# ## Directories to search within for the conntrack files above. +# ## Missing directrories will be ignored. +# dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] + + +# # Gather health check statuses from services registered in Consul +# [[inputs.consul]] +# ## Consul server address +# # address = "localhost:8500" +# +# ## URI scheme for the Consul server, one of "http", "https" +# # scheme = "http" +# +# ## ACL token used in every request +# # token = "" +# +# ## HTTP Basic Authentication username and password. +# # username = "" +# # password = "" +# +# ## Data center to query the health checks from +# # datacenter = "" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = true +# +# ## Consul checks' tag splitting +# # When tags are formatted like "key:value" with ":" as a delimiter then +# # they will be splitted and reported as proper key:value in Telegraf +# # tag_delimiter = ":" + + +# # Read metrics from one or many couchbase clusters +# [[inputs.couchbase]] +# ## specify servers via a url matching: +# ## [protocol://][:password]@address[:port] +# ## e.g. +# ## http://couchbase-0.example.com/ +# ## http://admin:secret@couchbase-0.example.com:8091/ +# ## +# ## If no servers are specified, then localhost is used as the host. +# ## If no protocol is specified, HTTP is used. +# ## If no port is specified, 8091 is used. +# servers = ["http://localhost:8091"] + + +# # Read CouchDB Stats from one or more servers +# [[inputs.couchdb]] +# ## Works with CouchDB stats endpoints out of the box +# ## Multiple Hosts from which to read CouchDB stats: +# hosts = ["http://localhost:8086/_stats"] +# +# ## Use HTTP Basic Authentication. +# # basic_username = "telegraf" +# # basic_password = "p@ssw0rd" + + +# # Input plugin for DC/OS metrics +# [[inputs.dcos]] +# ## The DC/OS cluster URL. +# cluster_url = "https://dcos-ee-master-1" +# +# ## The ID of the service account. +# service_account_id = "telegraf" +# ## The private key file for the service account. +# service_account_private_key = "/etc/telegraf/telegraf-sa-key.pem" +# +# ## Path containing login token. If set, will read on every gather. +# # token_file = "/home/dcos/.dcos/token" +# +# ## In all filter options if both include and exclude are empty all items +# ## will be collected. Arrays may contain glob patterns. +# ## +# ## Node IDs to collect metrics from. If a node is excluded, no metrics will +# ## be collected for its containers or apps. +# # node_include = [] +# # node_exclude = [] +# ## Container IDs to collect container metrics from. +# # container_include = [] +# # container_exclude = [] +# ## Container IDs to collect app metrics from. +# # app_include = [] +# # app_exclude = [] +# +# ## Maximum concurrent connections to the cluster. +# # max_connections = 10 +# ## Maximum time to receive a response from cluster. +# # response_timeout = "20s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## If false, skip chain & host verification +# # insecure_skip_verify = true +# +# ## Recommended filtering to reduce series cardinality. +# # [inputs.dcos.tagdrop] +# # path = ["/var/lib/mesos/slave/slaves/*"] + + +# # Read metrics from one or many disque servers +# [[inputs.disque]] +# ## An array of URI to gather stats about. Specify an ip or hostname +# ## with optional port and password. +# ## ie disque://localhost, disque://10.10.3.33:18832, 10.0.0.1:10000, etc. +# ## If no servers are specified, then localhost is used as the host. +# servers = ["localhost"] + + +# # Provide a native collection for dmsetup based statistics for dm-cache +# [[inputs.dmcache]] +# ## Whether to report per-device stats or not +# per_device = true + + +# # Query given DNS server and gives statistics +# [[inputs.dns_query]] +# ## servers to query +# servers = ["8.8.8.8"] +# +# ## Network is the network protocol name. +# # network = "udp" +# +# ## Domains or subdomains to query. +# # domains = ["."] +# +# ## Query record type. +# ## Posible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV. +# # record_type = "A" +# +# ## Dns server port. +# # port = 53 +# +# ## Query timeout in seconds. +# # timeout = 2 + + +# # Read metrics about docker containers +# [[inputs.docker]] +# ## Docker Endpoint +# ## To use TCP, set endpoint = "tcp://[ip]:[port]" +# ## To use environment variables (ie, docker-machine), set endpoint = "ENV" +# endpoint = "unix:///var/run/docker.sock" +# +# ## Set to true to collect Swarm metrics(desired_replicas, running_replicas) +# gather_services = false +# +# ## Only collect metrics for these containers, collect all if empty +# container_names = [] +# +# ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars +# source_tag = false +# +# ## Containers to include and exclude. Globs accepted. +# ## Note that an empty array for both will include all containers +# container_name_include = [] +# container_name_exclude = [] +# +# ## Container states to include and exclude. Globs accepted. +# ## When empty only containers in the "running" state will be captured. +# ## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"] +# ## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"] +# # container_state_include = [] +# # container_state_exclude = [] +# +# ## Timeout for docker list, info, and stats commands +# timeout = "5s" +# +# ## Whether to report for each container per-device blkio (8:0, 8:1...) and +# ## network (eth0, eth1, ...) stats or not +# perdevice = true +# +# ## Whether to report for each container total blkio and network stats or not +# total = false +# +# ## Which environment variables should we use as a tag +# ##tag_env = ["JAVA_HOME", "HEAP_SIZE"] +# +# ## docker labels to include and exclude as tags. Globs accepted. +# ## Note that an empty array for both will include all labels as tags +# docker_label_include = [] +# docker_label_exclude = [] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read statistics from one or many dovecot servers +# [[inputs.dovecot]] +# ## specify dovecot servers via an address:port list +# ## e.g. +# ## localhost:24242 +# ## +# ## If no servers are specified, then localhost is used as the host. +# servers = ["localhost:24242"] +# +# ## Type is one of "user", "domain", "ip", or "global" +# type = "global" +# +# ## Wildcard matches like "*.com". An empty string "" is same as "*" +# ## If type = "ip" filters should be +# filters = [""] + + +# # Read metrics about docker containers from Fargate/ECS v2 meta endpoints. +# [[inputs.ecs]] +# ## ECS metadata url +# # endpoint_url = "http://169.254.170.2" +# +# ## Containers to include and exclude. Globs accepted. +# ## Note that an empty array for both will include all containers +# # container_name_include = [] +# # container_name_exclude = [] +# +# ## Container states to include and exclude. Globs accepted. +# ## When empty only containers in the "RUNNING" state will be captured. +# ## Possible values are "NONE", "PULLED", "CREATED", "RUNNING", +# ## "RESOURCES_PROVISIONED", "STOPPED". +# # container_status_include = [] +# # container_status_exclude = [] +# +# ## ecs labels to include and exclude as tags. Globs accepted. +# ## Note that an empty array for both will include all labels as tags +# ecs_label_include = [ "com.amazonaws.ecs.*" ] +# ecs_label_exclude = [] +# +# ## Timeout for queries. +# # timeout = "5s" + + +# # Read stats from one or more Elasticsearch servers or clusters +# [[inputs.elasticsearch]] +# ## specify a list of one or more Elasticsearch servers +# # you can add username and password to your url to use basic authentication: +# # servers = ["http://user:pass@localhost:9200"] +# servers = ["http://localhost:9200"] +# +# ## Timeout for HTTP requests to the elastic search server(s) +# http_timeout = "5s" +# +# ## When local is true (the default), the node will read only its own stats. +# ## Set local to false when you want to read the node stats from all nodes +# ## of the cluster. +# local = true +# +# ## Set cluster_health to true when you want to also obtain cluster health stats +# cluster_health = false +# +# ## Adjust cluster_health_level when you want to also obtain detailed health stats +# ## The options are +# ## - indices (default) +# ## - cluster +# # cluster_health_level = "indices" +# +# ## Set cluster_stats to true when you want to also obtain cluster stats. +# cluster_stats = false +# +# ## Only gather cluster_stats from the master node. To work this require local = true +# cluster_stats_only_from_master = true +# +# ## Indices to collect; can be one or more indices names or _all +# indices_include = ["_all"] +# +# ## One of "shards", "cluster", "indices" +# indices_level = "shards" +# +# ## node_stats is a list of sub-stats that you want to have gathered. Valid options +# ## are "indices", "os", "process", "jvm", "thread_pool", "fs", "transport", "http", +# ## "breaker". Per default, all stats are gathered. +# # node_stats = ["jvm", "http"] +# +# ## HTTP Basic Authentication username and password. +# # username = "" +# # password = "" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Returns ethtool statistics for given interfaces +# [[inputs.ethtool]] +# ## List of interfaces to pull metrics for +# # interface_include = ["eth0"] +# +# ## List of interfaces to ignore when pulling metrics. +# # interface_exclude = ["eth1"] + + +# # Read metrics from one or more commands that can output to stdout +# [[inputs.exec]] +# ## Commands array +# commands = [ +# "/tmp/test.sh", +# "/usr/bin/mycollector --foo=bar", +# "/tmp/collect_*.sh" +# ] +# +# ## Timeout for each command to complete. +# timeout = "5s" +# +# ## measurement name suffix (for separating different commands) +# name_suffix = "_mycollector" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics from fail2ban. +# [[inputs.fail2ban]] +# ## Use sudo to run fail2ban-client +# use_sudo = false + + +# # Read devices value(s) from a Fibaro controller +# [[inputs.fibaro]] +# ## Required Fibaro controller address/hostname. +# ## Note: at the time of writing this plugin, Fibaro only implemented http - no https available +# url = "http://:80" +# +# ## Required credentials to access the API (http://) +# username = "" +# password = "" +# +# ## Amount of time allowed to complete the HTTP request +# # timeout = "5s" + + +# # Reload and gather from file[s] on telegraf's interval. +# [[inputs.file]] +# ## Files to parse each interval. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## /var/log/**.log -> recursively find all .log files in /var/log +# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +# ## /var/log/apache.log -> only read the apache log file +# files = ["/var/log/apache/access.log"] +# +# ## The dataformat to be read from files +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" +# +# ## Name a tag containing the name of the file the data was parsed from. Leave empty +# ## to disable. +# # file_tag = "" + + +# # Count files in a directory +# [[inputs.filecount]] +# ## Directory to gather stats about. +# ## deprecated in 1.9; use the directories option +# # directory = "/var/cache/apt/archives" +# +# ## Directories to gather stats about. +# ## This accept standard unit glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## /var/log/** -> recursively find all directories in /var/log and count files in each directories +# ## /var/log/*/* -> find all directories with a parent dir in /var/log and count files in each directories +# ## /var/log -> count all files in /var/log and all of its subdirectories +# directories = ["/var/cache/apt/archives"] +# +# ## Only count files that match the name pattern. Defaults to "*". +# name = "*.deb" +# +# ## Count files in subdirectories. Defaults to true. +# recursive = false +# +# ## Only count regular files. Defaults to true. +# regular_only = true +# +# ## Follow all symlinks while walking the directory tree. Defaults to false. +# follow_symlinks = false +# +# ## Only count files that are at least this size. If size is +# ## a negative number, only count files that are smaller than the +# ## absolute value of size. Acceptable units are B, KiB, MiB, KB, ... +# ## Without quotes and units, interpreted as size in bytes. +# size = "0B" +# +# ## Only count files that have not been touched for at least this +# ## duration. If mtime is negative, only count files that have been +# ## touched in this duration. Defaults to "0s". +# mtime = "0s" + + +# # Read stats about given file(s) +# [[inputs.filestat]] +# ## Files to gather stats about. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## "/var/log/**.log" -> recursively find all .log files in /var/log +# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log +# ## "/var/log/apache.log" -> just tail the apache log file +# ## +# ## See https://github.com/gobwas/glob for more examples +# ## +# files = ["/var/log/**.log"] +# +# ## If true, read the entire file and calculate an md5 checksum. +# md5 = false + + +# # Read real time temps from fireboard.io servers +# [[inputs.fireboard]] +# ## Specify auth token for your account +# auth_token = "invalidAuthToken" +# ## You can override the fireboard server URL if necessary +# # url = https://fireboard.io/api/v1/devices.json +# ## You can set a different http_timeout if you need to +# ## You should set a string using an number and time indicator +# ## for example "12s" for 12 seconds. +# # http_timeout = "4s" + + +# # Read metrics exposed by fluentd in_monitor plugin +# [[inputs.fluentd]] +# ## This plugin reads information exposed by fluentd (using /api/plugins.json endpoint). +# ## +# ## Endpoint: +# ## - only one URI is allowed +# ## - https is not supported +# endpoint = "http://localhost:24220/api/plugins.json" +# +# ## Define which plugins have to be excluded (based on "type" field - e.g. monitor_agent) +# exclude = [ +# "monitor_agent", +# "dummy", +# ] + + +# # Gather repository information from GitHub hosted repositories. +# [[inputs.github]] +# ## List of repositories to monitor. +# repositories = [ +# "influxdata/telegraf", +# "influxdata/influxdb" +# ] +# +# ## Github API access token. Unauthenticated requests are limited to 60 per hour. +# # access_token = "" +# +# ## Github API enterprise url. Github Enterprise accounts must specify their base url. +# # enterprise_base_url = "" +# +# ## Timeout for HTTP requests. +# # http_timeout = "5s" + + +# # Read flattened metrics from one or more GrayLog HTTP endpoints +# [[inputs.graylog]] +# ## API endpoint, currently supported API: +# ## +# ## - multiple (Ex http://:12900/system/metrics/multiple) +# ## - namespace (Ex http://:12900/system/metrics/namespace/{namespace}) +# ## +# ## For namespace endpoint, the metrics array will be ignored for that call. +# ## Endpoint can contain namespace and multiple type calls. +# ## +# ## Please check http://[graylog-server-ip]:12900/api-browser for full list +# ## of endpoints +# servers = [ +# "http://[graylog-server-ip]:12900/system/metrics/multiple", +# ] +# +# ## Metrics list +# ## List of metrics can be found on Graylog webservice documentation. +# ## Or by hitting the the web service api at: +# ## http://[graylog-host]:12900/system/metrics +# metrics = [ +# "jvm.cl.loaded", +# "jvm.memory.pools.Metaspace.committed" +# ] +# +# ## Username and password +# username = "" +# password = "" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics of haproxy, via socket or csv stats page +# [[inputs.haproxy]] +# ## An array of address to gather stats about. Specify an ip on hostname +# ## with optional port. ie localhost, 10.10.3.33:1936, etc. +# ## Make sure you specify the complete path to the stats endpoint +# ## including the protocol, ie http://10.10.3.33:1936/haproxy?stats +# +# ## If no servers are specified, then default to 127.0.0.1:1936/haproxy?stats +# servers = ["http://myhaproxy.com:1936/haproxy?stats"] +# +# ## Credentials for basic HTTP authentication +# # username = "admin" +# # password = "admin" +# +# ## You can also use local socket with standard wildcard globbing. +# ## Server address not starting with 'http' will be treated as a possible +# ## socket, so both examples below are valid. +# # servers = ["socket:/run/haproxy/admin.sock", "/run/haproxy/*.sock"] +# +# ## By default, some of the fields are renamed from what haproxy calls them. +# ## Setting this option to true results in the plugin keeping the original +# ## field names. +# # keep_field_names = false +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Monitor disks' temperatures using hddtemp +# [[inputs.hddtemp]] +# ## By default, telegraf gathers temps data from all disks detected by the +# ## hddtemp. +# ## +# ## Only collect temps from the selected disks. +# ## +# ## A * as the device name will return the temperature values of all disks. +# ## +# # address = "127.0.0.1:7634" +# # devices = ["sda", "*"] + + +# # Read formatted metrics from one or more HTTP endpoints +# [[inputs.http]] +# ## One or more URLs from which to read formatted metrics +# urls = [ +# "http://localhost/metrics" +# ] +# +# ## HTTP method +# # method = "GET" +# +# ## Optional HTTP headers +# # headers = {"X-Special-Header" = "Special-Value"} +# +# ## Optional HTTP Basic Auth Credentials +# # username = "username" +# # password = "pa$$word" +# +# ## HTTP entity-body to send with POST/PUT requests. +# # body = "" +# +# ## HTTP Content-Encoding for write request body, can be set to "gzip" to +# ## compress body or "identity" to apply no encoding. +# # content_encoding = "identity" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Amount of time allowed to complete the HTTP request +# # timeout = "5s" +# +# ## List of success status codes +# # success_status_codes = [200] +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# # data_format = "influx" + + +# # HTTP/HTTPS request given an address a method and a timeout +# [[inputs.http_response]] +# ## Deprecated in 1.12, use 'urls' +# ## Server address (default http://localhost) +# # address = "http://localhost" +# +# ## List of urls to query. +# # urls = ["http://localhost"] +# +# ## Set http_proxy (telegraf uses the system wide proxy settings if it's is not set) +# # http_proxy = "http://localhost:8888" +# +# ## Set response_timeout (default 5 seconds) +# # response_timeout = "5s" +# +# ## HTTP Request Method +# # method = "GET" +# +# ## Whether to follow redirects from the server (defaults to false) +# # follow_redirects = false +# +# ## Optional HTTP Request Body +# # body = ''' +# # {'fake':'data'} +# # ''' +# +# ## Optional substring or regex match in body of the response +# # response_string_match = "\"service_status\": \"up\"" +# # response_string_match = "ok" +# # response_string_match = "\".*_status\".?:.?\"up\"" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## HTTP Request Headers (all values must be strings) +# # [inputs.http_response.headers] +# # Host = "github.com" +# +# ## Interface to use when dialing an address +# # interface = "eth0" + + +# # Read flattened metrics from one or more JSON HTTP endpoints +# [[inputs.httpjson]] +# ## NOTE This plugin only reads numerical measurements, strings and booleans +# ## will be ignored. +# +# ## Name for the service being polled. Will be appended to the name of the +# ## measurement e.g. httpjson_webserver_stats +# ## +# ## Deprecated (1.3.0): Use name_override, name_suffix, name_prefix instead. +# name = "webserver_stats" +# +# ## URL of each server in the service's cluster +# servers = [ +# "http://localhost:9999/stats/", +# "http://localhost:9998/stats/", +# ] +# ## Set response_timeout (default 5 seconds) +# response_timeout = "5s" +# +# ## HTTP method to use: GET or POST (case-sensitive) +# method = "GET" +# +# ## List of tag names to extract from top-level of JSON server response +# # tag_keys = [ +# # "my_tag_1", +# # "my_tag_2" +# # ] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## HTTP parameters (all values must be strings). For "GET" requests, data +# ## will be included in the query. For "POST" requests, data will be included +# ## in the request body as "x-www-form-urlencoded". +# # [inputs.httpjson.parameters] +# # event_type = "cpu_spike" +# # threshold = "0.75" +# +# ## HTTP Headers (all values must be strings) +# # [inputs.httpjson.headers] +# # X-Auth-Token = "my-xauth-token" +# # apiVersion = "v1" + + +# # Gather Icinga2 status +# [[inputs.icinga2]] +# ## Required Icinga2 server address +# # server = "https://localhost:5665" +# +# ## Required Icinga2 object type ("services" or "hosts") +# # object_type = "services" +# +# ## Credentials for basic HTTP authentication +# # username = "admin" +# # password = "admin" +# +# ## Maximum time to receive response. +# # response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = true + + +# # Gets counters from all InfiniBand cards and ports installed +# [[inputs.infiniband]] +# # no configuration + + +# # Read InfluxDB-formatted JSON metrics from one or more HTTP endpoints +# [[inputs.influxdb]] +# ## Works with InfluxDB debug endpoints out of the box, +# ## but other services can use this format too. +# ## See the influxdb plugin's README for more details. +# +# ## Multiple URLs from which to read InfluxDB-formatted JSON +# ## Default is "http://localhost:8086/debug/vars". +# urls = [ +# "http://localhost:8086/debug/vars" +# ] +# +# ## Username and password to send using HTTP Basic Authentication. +# # username = "" +# # password = "" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## http request & header timeout +# timeout = "5s" + + +# # Collect statistics about itself +# [[inputs.internal]] +# ## If true, collect telegraf memory stats. +# # collect_memstats = true + + +# # This plugin gathers interrupts data from /proc/interrupts and /proc/softirqs. +# [[inputs.interrupts]] +# ## When set to true, cpu metrics are tagged with the cpu. Otherwise cpu is +# ## stored as a field. +# ## +# ## The default is false for backwards compatibility, and will be changed to +# ## true in a future version. It is recommended to set to true on new +# ## deployments. +# # cpu_as_tag = false +# +# ## To filter which IRQs to collect, make use of tagpass / tagdrop, i.e. +# # [inputs.interrupts.tagdrop] +# # irq = [ "NET_RX", "TASKLET" ] + + +# # Read metrics from the bare metal servers via IPMI +# [[inputs.ipmi_sensor]] +# ## optionally specify the path to the ipmitool executable +# # path = "/usr/bin/ipmitool" +# ## +# ## Setting 'use_sudo' to true will make use of sudo to run ipmitool. +# ## Sudo must be configured to allow the telegraf user to run ipmitool +# ## without a password. +# # use_sudo = false +# ## +# ## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR +# # privilege = "ADMINISTRATOR" +# ## +# ## optionally specify one or more servers via a url matching +# ## [username[:password]@][protocol[(address)]] +# ## e.g. +# ## root:passwd@lan(127.0.0.1) +# ## +# ## if no servers are specified, local machine sensor stats will be queried +# ## +# # servers = ["USERID:PASSW0RD@lan(192.168.1.1)"] +# +# ## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid +# ## gaps or overlap in pulled data +# interval = "30s" +# +# ## Timeout for the ipmitool command to complete +# timeout = "20s" +# +# ## Schema Version: (Optional, defaults to version 1) +# metric_version = 2 + + +# # Gather packets and bytes counters from Linux ipsets +# [[inputs.ipset]] +# ## By default, we only show sets which have already matched at least 1 packet. +# ## set include_unmatched_sets = true to gather them all. +# include_unmatched_sets = false +# ## Adjust your sudo settings appropriately if using this option ("sudo ipset save") +# use_sudo = false +# ## The default timeout of 1s for ipset execution can be overridden here: +# # timeout = "1s" + + +# # Gather packets and bytes throughput from iptables +# [[inputs.iptables]] +# ## iptables require root access on most systems. +# ## Setting 'use_sudo' to true will make use of sudo to run iptables. +# ## Users must configure sudo to allow telegraf user to run iptables with no password. +# ## iptables can be restricted to only list command "iptables -nvL". +# use_sudo = false +# ## Setting 'use_lock' to true runs iptables with the "-w" option. +# ## Adjust your sudo settings appropriately if using this option ("iptables -w 5 -nvl") +# use_lock = false +# ## Define an alternate executable, such as "ip6tables". Default is "iptables". +# # binary = "ip6tables" +# ## defines the table to monitor: +# table = "filter" +# ## defines the chains to monitor. +# ## NOTE: iptables rules without a comment will not be monitored. +# ## Read the plugin documentation for more information. +# chains = [ "INPUT" ] + + +# # Collect virtual and real server stats from Linux IPVS +# [[inputs.ipvs]] +# # no configuration + + +# # Read jobs and cluster metrics from Jenkins instances +# [[inputs.jenkins]] +# ## The Jenkins URL in the format "schema://host:port" +# url = "http://my-jenkins-instance:8080" +# # username = "admin" +# # password = "admin" +# +# ## Set response_timeout +# response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Optional Max Job Build Age filter +# ## Default 1 hour, ignore builds older than max_build_age +# # max_build_age = "1h" +# +# ## Optional Sub Job Depth filter +# ## Jenkins can have unlimited layer of sub jobs +# ## This config will limit the layers of pulling, default value 0 means +# ## unlimited pulling until no more sub jobs +# # max_subjob_depth = 0 +# +# ## Optional Sub Job Per Layer +# ## In workflow-multibranch-plugin, each branch will be created as a sub job. +# ## This config will limit to call only the lasted branches in each layer, +# ## empty will use default value 10 +# # max_subjob_per_layer = 10 +# +# ## Jobs to exclude from gathering +# # job_exclude = [ "job1", "job2/subjob1/subjob2", "job3/*"] +# +# ## Nodes to exclude from gathering +# # node_exclude = [ "node1", "node2" ] +# +# ## Worker pool for jenkins plugin only +# ## Empty this field will use default value 5 +# # max_connections = 5 + + +# # Read JMX metrics through Jolokia +# [[inputs.jolokia]] +# # DEPRECATED: the jolokia plugin has been deprecated in favor of the +# # jolokia2 plugin +# # see https://github.com/influxdata/telegraf/tree/master/plugins/inputs/jolokia2 +# +# ## This is the context root used to compose the jolokia url +# ## NOTE that Jolokia requires a trailing slash at the end of the context root +# ## NOTE that your jolokia security policy must allow for POST requests. +# context = "/jolokia/" +# +# ## This specifies the mode used +# # mode = "proxy" +# # +# ## When in proxy mode this section is used to specify further +# ## proxy address configurations. +# ## Remember to change host address to fit your environment. +# # [inputs.jolokia.proxy] +# # host = "127.0.0.1" +# # port = "8080" +# +# ## Optional http timeouts +# ## +# ## response_header_timeout, if non-zero, specifies the amount of time to wait +# ## for a server's response headers after fully writing the request. +# # response_header_timeout = "3s" +# ## +# ## client_timeout specifies a time limit for requests made by this client. +# ## Includes connection time, any redirects, and reading the response body. +# # client_timeout = "4s" +# +# ## Attribute delimiter +# ## +# ## When multiple attributes are returned for a single +# ## [inputs.jolokia.metrics], the field name is a concatenation of the metric +# ## name, and the attribute name, separated by the given delimiter. +# # delimiter = "_" +# +# ## List of servers exposing jolokia read service +# [[inputs.jolokia.servers]] +# name = "as-server-01" +# host = "127.0.0.1" +# port = "8080" +# # username = "myuser" +# # password = "mypassword" +# +# ## List of metrics collected on above servers +# ## Each metric consists in a name, a jmx path and either +# ## a pass or drop slice attribute. +# ## This collect all heap memory usage metrics. +# [[inputs.jolokia.metrics]] +# name = "heap_memory_usage" +# mbean = "java.lang:type=Memory" +# attribute = "HeapMemoryUsage" +# +# ## This collect thread counts metrics. +# [[inputs.jolokia.metrics]] +# name = "thread_count" +# mbean = "java.lang:type=Threading" +# attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" +# +# ## This collect number of class loaded/unloaded counts metrics. +# [[inputs.jolokia.metrics]] +# name = "class_count" +# mbean = "java.lang:type=ClassLoading" +# attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" + + +# # Read JMX metrics from a Jolokia REST agent endpoint +# [[inputs.jolokia2_agent]] +# # default_tag_prefix = "" +# # default_field_prefix = "" +# # default_field_separator = "." +# +# # Add agents URLs to query +# urls = ["http://localhost:8080/jolokia"] +# # username = "" +# # password = "" +# # response_timeout = "5s" +# +# ## Optional TLS config +# # tls_ca = "/var/private/ca.pem" +# # tls_cert = "/var/private/client.pem" +# # tls_key = "/var/private/client-key.pem" +# # insecure_skip_verify = false +# +# ## Add metrics to read +# [[inputs.jolokia2_agent.metric]] +# name = "java_runtime" +# mbean = "java.lang:type=Runtime" +# paths = ["Uptime"] + + +# # Read JMX metrics from a Jolokia REST proxy endpoint +# [[inputs.jolokia2_proxy]] +# # default_tag_prefix = "" +# # default_field_prefix = "" +# # default_field_separator = "." +# +# ## Proxy agent +# url = "http://localhost:8080/jolokia" +# # username = "" +# # password = "" +# # response_timeout = "5s" +# +# ## Optional TLS config +# # tls_ca = "/var/private/ca.pem" +# # tls_cert = "/var/private/client.pem" +# # tls_key = "/var/private/client-key.pem" +# # insecure_skip_verify = false +# +# ## Add proxy targets to query +# # default_target_username = "" +# # default_target_password = "" +# [[inputs.jolokia2_proxy.target]] +# url = "service:jmx:rmi:///jndi/rmi://targethost:9999/jmxrmi" +# # username = "" +# # password = "" +# +# ## Add metrics to read +# [[inputs.jolokia2_proxy.metric]] +# name = "java_runtime" +# mbean = "java.lang:type=Runtime" +# paths = ["Uptime"] + + +# # Read Kapacitor-formatted JSON metrics from one or more HTTP endpoints +# [[inputs.kapacitor]] +# ## Multiple URLs from which to read Kapacitor-formatted JSON +# ## Default is "http://localhost:9092/kapacitor/v1/debug/vars". +# urls = [ +# "http://localhost:9092/kapacitor/v1/debug/vars" +# ] +# +# ## Time limit for http requests +# timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Get kernel statistics from /proc/vmstat +# [[inputs.kernel_vmstat]] +# # no configuration + + +# # Read status information from one or more Kibana servers +# [[inputs.kibana]] +# ## Specify a list of one or more Kibana servers +# servers = ["http://localhost:5601"] +# +# ## Timeout for HTTP requests +# timeout = "5s" +# +# ## HTTP Basic Auth credentials +# # username = "username" +# # password = "pa$$word" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics from the Kubernetes api +# [[inputs.kube_inventory]] +# ## URL for the Kubernetes API +# url = "https://127.0.0.1" +# +# ## Namespace to use. Set to "" to use all namespaces. +# # namespace = "default" +# +# ## Use bearer token for authorization. ('bearer_token' takes priority) +# ## If both of these are empty, we'll use the default serviceaccount: +# ## at: /run/secrets/kubernetes.io/serviceaccount/token +# # bearer_token = "/path/to/bearer/token" +# ## OR +# # bearer_token_string = "abc_123" +# +# ## Set response_timeout (default 5 seconds) +# # response_timeout = "5s" +# +# ## Optional Resources to exclude from gathering +# ## Leave them with blank with try to gather everything available. +# ## Values can be - "daemonsets", deployments", "endpoints", "ingress", "nodes", +# ## "persistentvolumes", "persistentvolumeclaims", "pods", "services", "statefulsets" +# # resource_exclude = [ "deployments", "nodes", "statefulsets" ] +# +# ## Optional Resources to include when gathering +# ## Overrides resource_exclude if both set. +# # resource_include = [ "deployments", "nodes", "statefulsets" ] +# +# ## Optional TLS Config +# # tls_ca = "/path/to/cafile" +# # tls_cert = "/path/to/certfile" +# # tls_key = "/path/to/keyfile" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics from the kubernetes kubelet api +# [[inputs.kubernetes]] +# ## URL for the kubelet +# url = "http://127.0.0.1:10255" +# +# ## Use bearer token for authorization. ('bearer_token' takes priority) +# ## If both of these are empty, we'll use the default serviceaccount: +# ## at: /run/secrets/kubernetes.io/serviceaccount/token +# # bearer_token = "/path/to/bearer/token" +# ## OR +# # bearer_token_string = "abc_123" +# +# ## Pod labels to be added as tags. An empty array for both include and +# ## exclude will include all labels. +# # label_include = [] +# # label_exclude = ["*"] +# +# ## Set response_timeout (default 5 seconds) +# # response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = /path/to/cafile +# # tls_cert = /path/to/certfile +# # tls_key = /path/to/keyfile +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics from a LeoFS Server via SNMP +# [[inputs.leofs]] +# ## An array of URLs of the form: +# ## host [ ":" port] +# servers = ["127.0.0.1:4020"] + + +# # Provides Linux sysctl fs metrics +# [[inputs.linux_sysctl_fs]] +# # no configuration + + +# # Read metrics exposed by Logstash +# [[inputs.logstash]] +# ## The URL of the exposed Logstash API endpoint. +# url = "http://127.0.0.1:9600" +# +# ## Use Logstash 5 single pipeline API, set to true when monitoring +# ## Logstash 5. +# # single_pipeline = false +# +# ## Enable optional collection components. Can contain +# ## "pipelines", "process", and "jvm". +# # collect = ["pipelines", "process", "jvm"] +# +# ## Timeout for HTTP requests. +# # timeout = "5s" +# +# ## Optional HTTP Basic Auth credentials. +# # username = "username" +# # password = "pa$$word" +# +# ## Optional TLS Config. +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Use TLS but skip chain & host verification. +# # insecure_skip_verify = false +# +# ## Optional HTTP headers. +# # [inputs.logstash.headers] +# # "X-Special-Header" = "Special-Value" + + +# # Read metrics from local Lustre service on OST, MDS +# [[inputs.lustre2]] +# ## An array of /proc globs to search for Lustre stats +# ## If not specified, the default will work on Lustre 2.5.x +# ## +# # ost_procfiles = [ +# # "/proc/fs/lustre/obdfilter/*/stats", +# # "/proc/fs/lustre/osd-ldiskfs/*/stats", +# # "/proc/fs/lustre/obdfilter/*/job_stats", +# # ] +# # mds_procfiles = [ +# # "/proc/fs/lustre/mdt/*/md_stats", +# # "/proc/fs/lustre/mdt/*/job_stats", +# # ] + + +# # Gathers metrics from the /3.0/reports MailChimp API +# [[inputs.mailchimp]] +# ## MailChimp API key +# ## get from https://admin.mailchimp.com/account/api/ +# api_key = "" # required +# ## Reports for campaigns sent more than days_old ago will not be collected. +# ## 0 means collect all. +# days_old = 0 +# ## Campaign ID to get, if empty gets all campaigns, this option overrides days_old +# # campaign_id = "" + + +# # Retrives information on a specific host in a MarkLogic Cluster +# [[inputs.marklogic]] +# ## Base URL of the MarkLogic HTTP Server. +# url = "http://localhost:8002" +# +# ## List of specific hostnames to retrieve information. At least (1) required. +# # hosts = ["hostname1", "hostname2"] +# +# ## Using HTTP Basic Authentication. Management API requires 'manage-user' role privileges +# # username = "myuser" +# # password = "mypassword" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics from one or many mcrouter servers +# [[inputs.mcrouter]] +# ## An array of address to gather stats about. Specify an ip or hostname +# ## with port. ie tcp://localhost:11211, tcp://10.0.0.1:11211, etc. +# servers = ["tcp://localhost:11211", "unix:///var/run/mcrouter.sock"] +# +# ## Timeout for metric collections from all servers. Minimum timeout is "1s". +# # timeout = "5s" + + +# # Read metrics from one or many memcached servers +# [[inputs.memcached]] +# ## An array of address to gather stats about. Specify an ip on hostname +# ## with optional port. ie localhost, 10.0.0.1:11211, etc. +# servers = ["localhost:11211"] +# # unix_sockets = ["/var/run/memcached.sock"] + + +# # Telegraf plugin for gathering metrics from N Mesos masters +# [[inputs.mesos]] +# ## Timeout, in ms. +# timeout = 100 +# +# ## A list of Mesos masters. +# masters = ["http://localhost:5050"] +# +# ## Master metrics groups to be collected, by default, all enabled. +# master_collections = [ +# "resources", +# "master", +# "system", +# "agents", +# "frameworks", +# "framework_offers", +# "tasks", +# "messages", +# "evqueue", +# "registrar", +# "allocator", +# ] +# +# ## A list of Mesos slaves, default is [] +# # slaves = [] +# +# ## Slave metrics groups to be collected, by default, all enabled. +# # slave_collections = [ +# # "resources", +# # "agent", +# # "system", +# # "executors", +# # "tasks", +# # "messages", +# # ] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Collects scores from a Minecraft server's scoreboard using the RCON protocol +# [[inputs.minecraft]] +# ## Address of the Minecraft server. +# # server = "localhost" +# +# ## Server RCON Port. +# # port = "25575" +# +# ## Server RCON Password. +# password = "" +# +# ## Uncomment to remove deprecated metric components. +# # tagdrop = ["server"] + + +# # Retrieve data from MODBUS slave devices +# [[inputs.modbus]] +# ## Connection Configuration +# ## +# ## The plugin supports connections to PLCs via MODBUS/TCP or +# ## via serial line communication in binary (RTU) or readable (ASCII) encoding +# ## +# ## Device name +# name = "Device" +# +# ## Slave ID - addresses a MODBUS device on the bus +# ## Range: 0 - 255 [0 = broadcast; 248 - 255 = reserved] +# slave_id = 1 +# +# ## Timeout for each request +# timeout = "1s" +# +# # TCP - connect via Modbus/TCP +# controller = "tcp://localhost:502" +# +# # Serial (RS485; RS232) +# #controller = "file:///dev/ttyUSB0" +# #baud_rate = 9600 +# #data_bits = 8 +# #parity = "N" +# #stop_bits = 1 +# #transmission_mode = "RTU" +# +# +# ## Measurements +# ## +# +# ## Digital Variables, Discrete Inputs and Coils +# ## name - the variable name +# ## address - variable address +# +# discrete_inputs = [ +# { name = "start", address = [0]}, +# { name = "stop", address = [1]}, +# { name = "reset", address = [2]}, +# { name = "emergency_stop", address = [3]}, +# ] +# coils = [ +# { name = "motor1_run", address = [0]}, +# { name = "motor1_jog", address = [1]}, +# { name = "motor1_stop", address = [2]}, +# ] +# +# ## Analog Variables, Input Registers and Holding Registers +# ## name - the variable name +# ## byte_order - the ordering of bytes +# ## |---AB, ABCD - Big Endian +# ## |---BA, DCBA - Little Endian +# ## |---BADC - Mid-Big Endian +# ## |---CDAB - Mid-Little Endian +# ## data_type - UINT16, INT16, INT32, UINT32, FLOAT32, FLOAT32-IEEE (the IEEE 754 binary representation) +# ## scale - the final numeric variable representation +# ## address - variable address +# +# holding_registers = [ +# { name = "power_factor", byte_order = "AB", data_type = "FLOAT32", scale=0.01, address = [8]}, +# { name = "voltage", byte_order = "AB", data_type = "FLOAT32", scale=0.1, address = [0]}, +# { name = "energy", byte_order = "ABCD", data_type = "FLOAT32", scale=0.001, address = [5,6]}, +# { name = "current", byte_order = "ABCD", data_type = "FLOAT32", scale=0.001, address = [1,2]}, +# { name = "frequency", byte_order = "AB", data_type = "FLOAT32", scale=0.1, address = [7]}, +# { name = "power", byte_order = "ABCD", data_type = "FLOAT32", scale=0.1, address = [3,4]}, +# ] +# input_registers = [ +# { name = "tank_level", byte_order = "AB", data_type = "INT16", scale=1.0, address = [0]}, +# { name = "tank_ph", byte_order = "AB", data_type = "INT16", scale=1.0, address = [1]}, +# { name = "pump1_speed", byte_order = "ABCD", data_type = "INT32", scale=1.0, address = [3,4]}, +# ] + + +# # Read metrics from one or many MongoDB servers +# [[inputs.mongodb]] +# ## An array of URLs of the form: +# ## "mongodb://" [user ":" pass "@"] host [ ":" port] +# ## For example: +# ## mongodb://user:auth_key@10.10.3.30:27017, +# ## mongodb://10.10.3.33:18832, +# servers = ["mongodb://127.0.0.1:27017"] +# +# ## When true, collect per database stats +# # gather_perdb_stats = false +# +# ## When true, collect per collection stats +# # gather_col_stats = false +# +# ## List of db where collections stats are collected +# ## If empty, all db are concerned +# # col_stats_dbs = ["local"] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics and status information about processes managed by Monit +# [[inputs.monit]] +# ## Monit HTTPD address +# address = "http://127.0.0.1:2812" +# +# ## Username and Password for Monit +# # username = "" +# # password = "" +# +# ## Amount of time allowed to complete the HTTP request +# # timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Aggregates the contents of multiple files into a single point +# [[inputs.multifile]] +# ## Base directory where telegraf will look for files. +# ## Omit this option to use absolute paths. +# base_dir = "/sys/bus/i2c/devices/1-0076/iio:device0" +# +# ## If true, Telegraf discard all data when a single file can't be read. +# ## Else, Telegraf omits the field generated from this file. +# # fail_early = true +# +# ## Files to parse each interval. +# [[inputs.multifile.file]] +# file = "in_pressure_input" +# dest = "pressure" +# conversion = "float" +# [[inputs.multifile.file]] +# file = "in_temp_input" +# dest = "temperature" +# conversion = "float(3)" +# [[inputs.multifile.file]] +# file = "in_humidityrelative_input" +# dest = "humidityrelative" +# conversion = "float(3)" + + +# # Read metrics from one or many mysql servers +# [[inputs.mysql]] +# ## specify servers via a url matching: +# ## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify|custom]] +# ## see https://github.com/go-sql-driver/mysql#dsn-data-source-name +# ## e.g. +# ## servers = ["user:passwd@tcp(127.0.0.1:3306)/?tls=false"] +# ## servers = ["user@tcp(127.0.0.1:3306)/?tls=false"] +# # +# ## If no servers are specified, then localhost is used as the host. +# servers = ["tcp(127.0.0.1:3306)/"] +# +# ## Selects the metric output format. +# ## +# ## This option exists to maintain backwards compatibility, if you have +# ## existing metrics do not set or change this value until you are ready to +# ## migrate to the new format. +# ## +# ## If you do not have existing metrics from this plugin set to the latest +# ## version. +# ## +# ## Telegraf >=1.6: metric_version = 2 +# ## <1.6: metric_version = 1 (or unset) +# metric_version = 2 +# +# ## if the list is empty, then metrics are gathered from all databasee tables +# # table_schema_databases = [] +# +# ## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list +# # gather_table_schema = false +# +# ## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST +# # gather_process_list = false +# +# ## gather user statistics from INFORMATION_SCHEMA.USER_STATISTICS +# # gather_user_statistics = false +# +# ## gather auto_increment columns and max values from information schema +# # gather_info_schema_auto_inc = false +# +# ## gather metrics from INFORMATION_SCHEMA.INNODB_METRICS +# # gather_innodb_metrics = false +# +# ## gather metrics from SHOW SLAVE STATUS command output +# # gather_slave_status = false +# +# ## gather metrics from SHOW BINARY LOGS command output +# # gather_binary_logs = false +# +# ## gather metrics from PERFORMANCE_SCHEMA.GLOBAL_VARIABLES +# # gather_global_variables = true +# +# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMARY_BY_TABLE +# # gather_table_io_waits = false +# +# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_LOCK_WAITS +# # gather_table_lock_waits = false +# +# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMARY_BY_INDEX_USAGE +# # gather_index_io_waits = false +# +# ## gather metrics from PERFORMANCE_SCHEMA.EVENT_WAITS +# # gather_event_waits = false +# +# ## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME +# # gather_file_events_stats = false +# +# ## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST +# # gather_perf_events_statements = false +# +# ## the limits for metrics form perf_events_statements +# # perf_events_statements_digest_text_limit = 120 +# # perf_events_statements_limit = 250 +# # perf_events_statements_time_limit = 86400 +# +# ## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) +# ## example: interval_slow = "30m" +# # interval_slow = "" +# +# ## Optional TLS Config (will be used if tls=custom parameter specified in server uri) +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Provides metrics about the state of a NATS server +# [[inputs.nats]] +# ## The address of the monitoring endpoint of the NATS server +# server = "http://localhost:8222" +# +# ## Maximum time to receive response +# # response_timeout = "5s" + + +# # Neptune Apex data collector +# [[inputs.neptune_apex]] +# ## The Neptune Apex plugin reads the publicly available status.xml data from a local Apex. +# ## Measurements will be logged under "apex". +# +# ## The base URL of the local Apex(es). If you specify more than one server, they will +# ## be differentiated by the "source" tag. +# servers = [ +# "http://apex.local", +# ] +# +# ## The response_timeout specifies how long to wait for a reply from the Apex. +# #response_timeout = "5s" + + +# # Read metrics about network interface usage +# [[inputs.net]] +# ## By default, telegraf gathers stats from any up interface (excluding loopback) +# ## Setting interfaces will tell it to gather these explicit interfaces, +# ## regardless of status. +# ## +# # interfaces = ["eth0"] +# ## +# ## On linux systems telegraf also collects protocol stats. +# ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics. +# ## +# # ignore_protocol_stats = false +# ## + + +# # Collect response time of a TCP or UDP connection +# [[inputs.net_response]] +# ## Protocol, must be "tcp" or "udp" +# ## NOTE: because the "udp" protocol does not respond to requests, it requires +# ## a send/expect string pair (see below). +# protocol = "tcp" +# ## Server address (default localhost) +# address = "localhost:80" +# +# ## Set timeout +# # timeout = "1s" +# +# ## Set read timeout (only used if expecting a response) +# # read_timeout = "1s" +# +# ## The following options are required for UDP checks. For TCP, they are +# ## optional. The plugin will send the given string to the server and then +# ## expect to receive the given 'expect' string back. +# ## string sent to the server +# # send = "ssh" +# ## expected string in answer +# # expect = "ssh" +# +# ## Uncomment to remove deprecated fields +# # fielddrop = ["result_type", "string_found"] + + +# # Read TCP metrics such as established, time wait and sockets counts. +# [[inputs.netstat]] +# # no configuration + + +# # Read Nginx's basic status information (ngx_http_stub_status_module) +# [[inputs.nginx]] +# # An array of Nginx stub_status URI to gather stats. +# urls = ["http://localhost/server_status"] +# +# ## Optional TLS Config +# tls_ca = "/etc/telegraf/ca.pem" +# tls_cert = "/etc/telegraf/cert.cer" +# tls_key = "/etc/telegraf/key.key" +# ## Use TLS but skip chain & host verification +# insecure_skip_verify = false +# +# # HTTP response timeout (default: 5s) +# response_timeout = "5s" + + +# # Read Nginx Plus' full status information (ngx_http_status_module) +# [[inputs.nginx_plus]] +# ## An array of ngx_http_status_module or status URI to gather stats. +# urls = ["http://localhost/status"] +# +# # HTTP response timeout (default: 5s) +# response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read Nginx Plus Api documentation +# [[inputs.nginx_plus_api]] +# ## An array of API URI to gather stats. +# urls = ["http://localhost/api"] +# +# # Nginx API version, default: 3 +# # api_version = 3 +# +# # HTTP response timeout (default: 5s) +# response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read nginx_upstream_check module status information (https://github.com/yaoweibin/nginx_upstream_check_module) +# [[inputs.nginx_upstream_check]] +# ## An URL where Nginx Upstream check module is enabled +# ## It should be set to return a JSON formatted response +# url = "http://127.0.0.1/status?format=json" +# +# ## HTTP method +# # method = "GET" +# +# ## Optional HTTP headers +# # headers = {"X-Special-Header" = "Special-Value"} +# +# ## Override HTTP "Host" header +# # host_header = "check.example.com" +# +# ## Timeout for HTTP requests +# timeout = "5s" +# +# ## Optional HTTP Basic Auth credentials +# # username = "username" +# # password = "pa$$word" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read Nginx virtual host traffic status module information (nginx-module-vts) +# [[inputs.nginx_vts]] +# ## An array of ngx_http_status_module or status URI to gather stats. +# urls = ["http://localhost/status"] +# +# ## HTTP response timeout (default: 5s) +# response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read NSQ topic and channel statistics. +# [[inputs.nsq]] +# ## An array of NSQD HTTP API endpoints +# endpoints = ["http://localhost:4151"] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Collect kernel snmp counters and network interface statistics +# [[inputs.nstat]] +# ## file paths for proc files. If empty default paths will be used: +# ## /proc/net/netstat, /proc/net/snmp, /proc/net/snmp6 +# ## These can also be overridden with env variables, see README. +# proc_net_netstat = "/proc/net/netstat" +# proc_net_snmp = "/proc/net/snmp" +# proc_net_snmp6 = "/proc/net/snmp6" +# ## dump metrics with 0 values too +# dump_zeros = true + + +# # Get standard NTP query metrics, requires ntpq executable. +# [[inputs.ntpq]] +# ## If false, set the -n ntpq flag. Can reduce metric gather time. +# dns_lookup = true + + +# # Pulls statistics from nvidia GPUs attached to the host +# [[inputs.nvidia_smi]] +# ## Optional: path to nvidia-smi binary, defaults to $PATH via exec.LookPath +# # bin_path = "/usr/bin/nvidia-smi" +# +# ## Optional: timeout for GPU polling +# # timeout = "5s" + + +# # OpenLDAP cn=Monitor plugin +# [[inputs.openldap]] +# host = "localhost" +# port = 389 +# +# # ldaps, starttls, or no encryption. default is an empty string, disabling all encryption. +# # note that port will likely need to be changed to 636 for ldaps +# # valid options: "" | "starttls" | "ldaps" +# tls = "" +# +# # skip peer certificate verification. Default is false. +# insecure_skip_verify = false +# +# # Path to PEM-encoded Root certificate to use to verify server certificate +# tls_ca = "/etc/ssl/certs.pem" +# +# # dn/password to bind with. If bind_dn is empty, an anonymous bind is performed. +# bind_dn = "" +# bind_password = "" +# +# # Reverse metric names so they sort more naturally. Recommended. +# # This defaults to false if unset, but is set to true when generating a new config +# reverse_metric_names = true + + +# # Get standard NTP query metrics from OpenNTPD. +# [[inputs.openntpd]] +# ## Run ntpctl binary with sudo. +# # use_sudo = false +# +# ## Location of the ntpctl binary. +# # binary = "/usr/sbin/ntpctl" +# +# ## Maximum time the ntpctl binary is allowed to run. +# # timeout = "5ms" + + +# # A plugin to collect stats from Opensmtpd - a validating, recursive, and caching DNS resolver +# [[inputs.opensmtpd]] +# ## If running as a restricted user you can prepend sudo for additional access: +# #use_sudo = false +# +# ## The default location of the smtpctl binary can be overridden with: +# binary = "/usr/sbin/smtpctl" +# +# ## The default timeout of 1000ms can be overriden with (in milliseconds): +# timeout = 1000 + + +# # Read current weather and forecasts data from openweathermap.org +# [[inputs.openweathermap]] +# ## OpenWeatherMap API key. +# app_id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +# +# ## City ID's to collect weather data from. +# city_id = ["5391959"] +# +# ## Language of the description field. Can be one of "ar", "bg", +# ## "ca", "cz", "de", "el", "en", "fa", "fi", "fr", "gl", "hr", "hu", +# ## "it", "ja", "kr", "la", "lt", "mk", "nl", "pl", "pt", "ro", "ru", +# ## "se", "sk", "sl", "es", "tr", "ua", "vi", "zh_cn", "zh_tw" +# # lang = "en" +# +# ## APIs to fetch; can contain "weather" or "forecast". +# fetch = ["weather", "forecast"] +# +# ## OpenWeatherMap base URL +# # base_url = "https://api.openweathermap.org/" +# +# ## Timeout for HTTP response. +# # response_timeout = "5s" +# +# ## Preferred unit system for temperature and wind speed. Can be one of +# ## "metric", "imperial", or "standard". +# # units = "metric" +# +# ## Query interval; OpenWeatherMap updates their weather data every 10 +# ## minutes. +# interval = "10m" + + +# # Read metrics of passenger using passenger-status +# [[inputs.passenger]] +# ## Path of passenger-status. +# ## +# ## Plugin gather metric via parsing XML output of passenger-status +# ## More information about the tool: +# ## https://www.phusionpassenger.com/library/admin/apache/overall_status_report.html +# ## +# ## If no path is specified, then the plugin simply execute passenger-status +# ## hopefully it can be found in your PATH +# command = "passenger-status -v --show=xml" + + +# # Gather counters from PF +# [[inputs.pf]] +# ## PF require root access on most systems. +# ## Setting 'use_sudo' to true will make use of sudo to run pfctl. +# ## Users must configure sudo to allow telegraf user to run pfctl with no password. +# ## pfctl can be restricted to only list command "pfctl -s info". +# use_sudo = false + + +# # Read metrics of phpfpm, via HTTP status page or socket +# [[inputs.phpfpm]] +# ## An array of addresses to gather stats about. Specify an ip or hostname +# ## with optional port and path +# ## +# ## Plugin can be configured in three modes (either can be used): +# ## - http: the URL must start with http:// or https://, ie: +# ## "http://localhost/status" +# ## "http://192.168.130.1/status?full" +# ## +# ## - unixsocket: path to fpm socket, ie: +# ## "/var/run/php5-fpm.sock" +# ## or using a custom fpm status path: +# ## "/var/run/php5-fpm.sock:fpm-custom-status-path" +# ## +# ## - fcgi: the URL must start with fcgi:// or cgi://, and port must be present, ie: +# ## "fcgi://10.0.0.12:9000/status" +# ## "cgi://10.0.10.12:9001/status" +# ## +# ## Example of multiple gathering from local socket and remote host +# ## urls = ["http://192.168.1.20/status", "/tmp/fpm.sock"] +# urls = ["http://localhost/status"] +# +# ## Duration allowed to complete HTTP requests. +# # timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Ping given url(s) and return statistics +# [[inputs.ping]] +# ## Hosts to send ping packets to. +# urls = ["example.org"] +# +# ## Method used for sending pings, can be either "exec" or "native". When set +# ## to "exec" the systems ping command will be executed. When set to "native" +# ## the plugin will send pings directly. +# ## +# ## While the default is "exec" for backwards compatibility, new deployments +# ## are encouraged to use the "native" method for improved compatibility and +# ## performance. +# # method = "exec" +# +# ## Number of ping packets to send per interval. Corresponds to the "-c" +# ## option of the ping command. +# # count = 1 +# +# ## Time to wait between sending ping packets in seconds. Operates like the +# ## "-i" option of the ping command. +# # ping_interval = 1.0 +# +# ## If set, the time to wait for a ping response in seconds. Operates like +# ## the "-W" option of the ping command. +# # timeout = 1.0 +# +# ## If set, the total ping deadline, in seconds. Operates like the -w option +# ## of the ping command. +# # deadline = 10 +# +# ## Interface or source address to send ping from. Operates like the -I or -S +# ## option of the ping command. +# # interface = "" +# +# ## Specify the ping executable binary. +# # binary = "ping" +# +# ## Arguments for ping command. When arguments is not empty, the command from +# ## the binary option will be used and other options (ping_interval, timeout, +# ## etc) will be ignored. +# # arguments = ["-c", "3"] +# +# ## Use only IPv6 addresses when resolving a hostname. +# # ipv6 = false + + +# # Measure postfix queue statistics +# [[inputs.postfix]] +# ## Postfix queue directory. If not provided, telegraf will try to use +# ## 'postconf -h queue_directory' to determine it. +# # queue_directory = "/var/spool/postfix" + + +# # Read metrics from one or many PowerDNS servers +# [[inputs.powerdns]] +# ## An array of sockets to gather stats about. +# ## Specify a path to unix socket. +# unix_sockets = ["/var/run/pdns.controlsocket"] + + +# # Read metrics from one or many PowerDNS Recursor servers +# [[inputs.powerdns_recursor]] +# ## Path to the Recursor control socket. +# unix_sockets = ["/var/run/pdns_recursor.controlsocket"] +# +# ## Directory to create receive socket. This default is likely not writable, +# ## please reference the full plugin documentation for a recommended setup. +# # socket_dir = "/var/run/" +# ## Socket permissions for the receive socket. +# # socket_mode = "0666" + + +# # Monitor process cpu and memory usage +# [[inputs.procstat]] +# ## PID file to monitor process +# pid_file = "/var/run/nginx.pid" +# ## executable name (ie, pgrep ) +# # exe = "nginx" +# ## pattern as argument for pgrep (ie, pgrep -f ) +# # pattern = "nginx" +# ## user as argument for pgrep (ie, pgrep -u ) +# # user = "nginx" +# ## Systemd unit name +# # systemd_unit = "nginx.service" +# ## CGroup name or path +# # cgroup = "systemd/system.slice/nginx.service" +# +# ## Windows service name +# # win_service = "" +# +# ## override for process_name +# ## This is optional; default is sourced from /proc//status +# # process_name = "bar" +# +# ## Field name prefix +# # prefix = "" +# +# ## When true add the full cmdline as a tag. +# # cmdline_tag = false +# +# ## Add PID as a tag instead of a field; useful to differentiate between +# ## processes whose tags are otherwise the same. Can create a large number +# ## of series, use judiciously. +# # pid_tag = false +# +# ## Method to use when finding process IDs. Can be one of 'pgrep', or +# ## 'native'. The pgrep finder calls the pgrep executable in the PATH while +# ## the native finder performs the search directly in a manor dependent on the +# ## platform. Default is 'pgrep' +# # pid_finder = "pgrep" +[[inputs.procstat]] + pattern = "nginx" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "wrk2" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "redis" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "deepflow-agent" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "productpage" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "details" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "ws-javaagent.jar" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "ratings" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "envoy" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "kube-apiserver" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "authApp" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "postgres" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "postgres" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "mysqld" + pid_finder = "pgrep" + pid_tag = true + +[[inputs.procstat]] + pattern = "go-server-sample" + pid_finder = "pgrep" + pid_tag = true + +# # Reads last_run_summary.yaml file and converts to measurments +# [[inputs.puppetagent]] +# ## Location of puppet last run summary file +# location = "/var/lib/puppet/state/last_run_summary.yaml" + + +# # Reads metrics from RabbitMQ servers via the Management Plugin +# [[inputs.rabbitmq]] +# ## Management Plugin url. (default: http://localhost:15672) +# # url = "http://localhost:15672" +# ## Tag added to rabbitmq_overview series; deprecated: use tags +# # name = "rmq-server-1" +# ## Credentials +# # username = "guest" +# # password = "guest" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Optional request timeouts +# ## +# ## ResponseHeaderTimeout, if non-zero, specifies the amount of time to wait +# ## for a server's response headers after fully writing the request. +# # header_timeout = "3s" +# ## +# ## client_timeout specifies a time limit for requests made by this client. +# ## Includes connection time, any redirects, and reading the response body. +# # client_timeout = "4s" +# +# ## A list of nodes to gather as the rabbitmq_node measurement. If not +# ## specified, metrics for all nodes are gathered. +# # nodes = ["rabbit@node1", "rabbit@node2"] +# +# ## A list of queues to gather as the rabbitmq_queue measurement. If not +# ## specified, metrics for all queues are gathered. +# # queues = ["telegraf"] +# +# ## A list of exchanges to gather as the rabbitmq_exchange measurement. If not +# ## specified, metrics for all exchanges are gathered. +# # exchanges = ["telegraf"] +# +# ## Queues to include and exclude. Globs accepted. +# ## Note that an empty array for both will include all queues +# queue_name_include = [] +# queue_name_exclude = [] +# +# ## Federation upstreams include and exclude when gathering the rabbitmq_federation measurement. +# ## If neither are specified, metrics for all federation upstreams are gathered. +# ## Federation link metrics will only be gathered for queues and exchanges +# ## whose non-federation metrics will be collected (e.g a queue excluded +# ## by the 'queue_name_exclude' option will also be excluded from federation). +# ## Globs accepted. +# # federation_upstream_include = ["dataCentre-*"] +# # federation_upstream_exclude = [] + + +# # Read raindrops stats (raindrops - real-time stats for preforking Rack servers) +# [[inputs.raindrops]] +# ## An array of raindrops middleware URI to gather stats. +# urls = ["http://localhost:8080/_raindrops"] + + +# # Read metrics from one or many redis servers +# [[inputs.redis]] +# ## specify servers via a url matching: +# ## [protocol://][:password]@address[:port] +# ## e.g. +# ## tcp://localhost:6379 +# ## tcp://:password@192.168.99.100 +# ## unix:///var/run/redis.sock +# ## +# ## If no servers are specified, then localhost is used as the host. +# ## If no port is specified, 6379 is used +# servers = ["tcp://localhost:6379"] +# +# ## specify server password +# # password = "s#cr@t%" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = true + + +# # Read metrics from one or many RethinkDB servers +# [[inputs.rethinkdb]] +# ## An array of URI to gather stats about. Specify an ip or hostname +# ## with optional port add password. ie, +# ## rethinkdb://user:auth_key@10.10.3.30:28105, +# ## rethinkdb://10.10.3.33:18832, +# ## 10.0.0.1:10000, etc. +# servers = ["127.0.0.1:28015"] +# ## +# ## If you use actual rethinkdb of > 2.3.0 with username/password authorization, +# ## protocol have to be named "rethinkdb2" - it will use 1_0 H. +# # servers = ["rethinkdb2://username:password@127.0.0.1:28015"] +# ## +# ## If you use older versions of rethinkdb (<2.2) with auth_key, protocol +# ## have to be named "rethinkdb". +# # servers = ["rethinkdb://username:auth_key@127.0.0.1:28015"] + + +# # Read metrics one or many Riak servers +# [[inputs.riak]] +# # Specify a list of one or more riak http servers +# servers = ["http://localhost:8098"] + + +# # Read API usage and limits for a Salesforce organisation +# [[inputs.salesforce]] +# ## specify your credentials +# ## +# username = "your_username" +# password = "your_password" +# ## +# ## (optional) security token +# # security_token = "your_security_token" +# ## +# ## (optional) environment type (sandbox or production) +# ## default is: production +# ## +# # environment = "production" +# ## +# ## (optional) API version (default: "39.0") +# ## +# # version = "39.0" + + +# # Monitor sensors, requires lm-sensors package +# [[inputs.sensors]] +# ## Remove numbers from field names. +# ## If true, a field name like 'temp1_input' will be changed to 'temp_input'. +# # remove_numbers = true +# +# ## Timeout is the maximum amount of time that the sensors command can run. +# # timeout = "5s" + + +# # Read metrics from storage devices supporting S.M.A.R.T. +# [[inputs.smart]] +# ## Optionally specify the path to the smartctl executable +# # path = "/usr/bin/smartctl" +# +# ## On most platforms smartctl requires root access. +# ## Setting 'use_sudo' to true will make use of sudo to run smartctl. +# ## Sudo must be configured to to allow the telegraf user to run smartctl +# ## without a password. +# # use_sudo = false +# +# ## Skip checking disks in this power mode. Defaults to +# ## "standby" to not wake up disks that have stoped rotating. +# ## See --nocheck in the man pages for smartctl. +# ## smartctl version 5.41 and 5.42 have faulty detection of +# ## power mode and might require changing this value to +# ## "never" depending on your disks. +# # nocheck = "standby" +# +# ## Gather all returned S.M.A.R.T. attribute metrics and the detailed +# ## information from each drive into the 'smart_attribute' measurement. +# # attributes = false +# +# ## Optionally specify devices to exclude from reporting. +# # excludes = [ "/dev/pass6" ] +# +# ## Optionally specify devices and device type, if unset +# ## a scan (smartctl --scan) for S.M.A.R.T. devices will +# ## done and all found will be included except for the +# ## excluded in excludes. +# # devices = [ "/dev/ada0 -d atacam" ] +# +# ## Timeout for the smartctl command to complete. +# # timeout = "30s" + + +# # Retrieves SNMP values from remote agents +# [[inputs.snmp]] +# ## Agent addresses to retrieve values from. +# ## example: agents = ["udp://127.0.0.1:161"] +# ## agents = ["tcp://127.0.0.1:161"] +# agents = ["udp://127.0.0.1:161"] +# +# ## Timeout for each request. +# # timeout = "5s" +# +# ## SNMP version; can be 1, 2, or 3. +# # version = 2 +# +# ## SNMP community string. +# # community = "public" +# +# ## Number of retries to attempt. +# # retries = 3 +# +# ## The GETBULK max-repetitions parameter. +# # max_repetitions = 10 +# +# ## SNMPv3 authentication and encryption options. +# ## +# ## Security Name. +# # sec_name = "myuser" +# ## Authentication protocol; one of "MD5", "SHA", or "". +# # auth_protocol = "MD5" +# ## Authentication password. +# # auth_password = "pass" +# ## Security Level; one of "noAuthNoPriv", "authNoPriv", or "authPriv". +# # sec_level = "authNoPriv" +# ## Context Name. +# # context_name = "" +# ## Privacy protocol used for encrypted messages; one of "DES", "AES" or "". +# # priv_protocol = "" +# ## Privacy password used for encrypted messages. +# # priv_password = "" +# +# ## Add fields and tables defining the variables you wish to collect. This +# ## example collects the system uptime and interface variables. Reference the +# ## full plugin documentation for configuration details. + + +# # DEPRECATED! PLEASE USE inputs.snmp INSTEAD. +# [[inputs.snmp_legacy]] +# ## Use 'oids.txt' file to translate oids to names +# ## To generate 'oids.txt' you need to run: +# ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt +# ## Or if you have an other MIB folder with custom MIBs +# ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt +# snmptranslate_file = "/tmp/oids.txt" +# [[inputs.snmp.host]] +# address = "192.168.2.2:161" +# # SNMP community +# community = "public" # default public +# # SNMP version (1, 2 or 3) +# # Version 3 not supported yet +# version = 2 # default 2 +# # SNMP response timeout +# timeout = 2.0 # default 2.0 +# # SNMP request retries +# retries = 2 # default 2 +# # Which get/bulk do you want to collect for this host +# collect = ["mybulk", "sysservices", "sysdescr"] +# # Simple list of OIDs to get, in addition to "collect" +# get_oids = [] +# +# [[inputs.snmp.host]] +# address = "192.168.2.3:161" +# community = "public" +# version = 2 +# timeout = 2.0 +# retries = 2 +# collect = ["mybulk"] +# get_oids = [ +# "ifNumber", +# ".1.3.6.1.2.1.1.3.0", +# ] +# +# [[inputs.snmp.get]] +# name = "ifnumber" +# oid = "ifNumber" +# +# [[inputs.snmp.get]] +# name = "interface_speed" +# oid = "ifSpeed" +# instance = "0" +# +# [[inputs.snmp.get]] +# name = "sysuptime" +# oid = ".1.3.6.1.2.1.1.3.0" +# unit = "second" +# +# [[inputs.snmp.bulk]] +# name = "mybulk" +# max_repetition = 127 +# oid = ".1.3.6.1.2.1.1" +# +# [[inputs.snmp.bulk]] +# name = "ifoutoctets" +# max_repetition = 127 +# oid = "ifOutOctets" +# +# [[inputs.snmp.host]] +# address = "192.168.2.13:161" +# #address = "127.0.0.1:161" +# community = "public" +# version = 2 +# timeout = 2.0 +# retries = 2 +# #collect = ["mybulk", "sysservices", "sysdescr", "systype"] +# collect = ["sysuptime" ] +# [[inputs.snmp.host.table]] +# name = "iftable3" +# include_instances = ["enp5s0", "eth1"] +# +# # SNMP TABLEs +# # table without mapping neither subtables +# [[inputs.snmp.table]] +# name = "iftable1" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# +# # table without mapping but with subtables +# [[inputs.snmp.table]] +# name = "iftable2" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# sub_tables = [".1.3.6.1.2.1.2.2.1.13"] +# +# # table with mapping but without subtables +# [[inputs.snmp.table]] +# name = "iftable3" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# # if empty. get all instances +# mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" +# # if empty, get all subtables +# +# # table with both mapping and subtables +# [[inputs.snmp.table]] +# name = "iftable4" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# # if empty get all instances +# mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" +# # if empty get all subtables +# # sub_tables could be not "real subtables" +# sub_tables=[".1.3.6.1.2.1.2.2.1.13", "bytes_recv", "bytes_send"] + + +# # Read stats from one or more Solr servers or cores +# [[inputs.solr]] +# ## specify a list of one or more Solr servers +# servers = ["http://localhost:8983"] +# +# ## specify a list of one or more Solr cores (default - all) +# # cores = ["main"] +# +# ## Optional HTTP Basic Auth Credentials +# # username = "username" +# # password = "pa$$word" + + +# # Read metrics from Microsoft SQL Server +# [[inputs.sqlserver]] +# ## Specify instances to monitor with a list of connection strings. +# ## All connection parameters are optional. +# ## By default, the host is localhost, listening on default port, TCP 1433. +# ## for Windows, the user is the currently running AD user (SSO). +# ## See https://github.com/denisenkom/go-mssqldb for detailed connection +# ## parameters, in particular, tls connections can be created like so: +# ## "encrypt=true;certificate=;hostNameInCertificate=" +# # servers = [ +# # "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", +# # ] +# +# ## Optional parameter, setting this to 2 will use a new version +# ## of the collection queries that break compatibility with the original +# ## dashboards. +# query_version = 2 +# +# ## If you are using AzureDB, setting this to true will gather resource utilization metrics +# # azuredb = false +# +# ## Possible queries: +# ## - PerformanceCounters +# ## - WaitStatsCategorized +# ## - DatabaseIO +# ## - DatabaseProperties +# ## - CPUHistory +# ## - DatabaseSize +# ## - DatabaseStats +# ## - MemoryClerk +# ## - VolumeSpace +# ## - PerformanceMetrics +# ## - Schedulers +# ## - AzureDBResourceStats +# ## - AzureDBResourceGovernance +# ## - SqlRequests +# ## - ServerProperties +# ## A list of queries to include. If not specified, all the above listed queries are used. +# # include_query = [] +# +# ## A list of queries to explicitly ignore. +# exclude_query = [ 'Schedulers' , 'SqlRequests'] + + +# # Gather timeseries from Google Cloud Platform v3 monitoring API +# [[inputs.stackdriver]] +# ## GCP Project +# project = "erudite-bloom-151019" +# +# ## Include timeseries that start with the given metric type. +# metric_type_prefix_include = [ +# "compute.googleapis.com/", +# ] +# +# ## Exclude timeseries that start with the given metric type. +# # metric_type_prefix_exclude = [] +# +# ## Many metrics are updated once per minute; it is recommended to override +# ## the agent level interval with a value of 1m or greater. +# interval = "1m" +# +# ## Maximum number of API calls to make per second. The quota for accounts +# ## varies, it can be viewed on the API dashboard: +# ## https://cloud.google.com/monitoring/quotas#quotas_and_limits +# # rate_limit = 14 +# +# ## The delay and window options control the number of points selected on +# ## each gather. When set, metrics are gathered between: +# ## start: now() - delay - window +# ## end: now() - delay +# # +# ## Collection delay; if set too low metrics may not yet be available. +# # delay = "5m" +# # +# ## If unset, the window will start at 1m and be updated dynamically to span +# ## the time between calls (approximately the length of the plugin interval). +# # window = "1m" +# +# ## TTL for cached list of metric types. This is the maximum amount of time +# ## it may take to discover new metrics. +# # cache_ttl = "1h" +# +# ## If true, raw bucket counts are collected for distribution value types. +# ## For a more lightweight collection, you may wish to disable and use +# ## distribution_aggregation_aligners instead. +# # gather_raw_distribution_buckets = true +# +# ## Aggregate functions to be used for metrics whose value type is +# ## distribution. These aggregate values are recorded in in addition to raw +# ## bucket counts; if they are enabled. +# ## +# ## For a list of aligner strings see: +# ## https://cloud.google.com/monitoring/api/ref_v3/rpc/google.monitoring.v3#aligner +# # distribution_aggregation_aligners = [ +# # "ALIGN_PERCENTILE_99", +# # "ALIGN_PERCENTILE_95", +# # "ALIGN_PERCENTILE_50", +# # ] +# +# ## Filters can be added to reduce the number of time series matched. All +# ## functions are supported: starts_with, ends_with, has_substring, and +# ## one_of. Only the '=' operator is supported. +# ## +# ## The logical operators when combining filters are defined statically using +# ## the following values: +# ## filter ::= {AND } +# ## resource_labels ::= {OR } +# ## metric_labels ::= {OR } +# ## +# ## For more details, see https://cloud.google.com/monitoring/api/v3/filters +# # +# ## Resource labels refine the time series selection with the following expression: +# ## resource.labels. = +# # [[inputs.stackdriver.filter.resource_labels]] +# # key = "instance_name" +# # value = 'starts_with("localhost")' +# # +# ## Metric labels refine the time series selection with the following expression: +# ## metric.labels. = +# # [[inputs.stackdriver.filter.metric_labels]] +# # key = "device_name" +# # value = 'one_of("sda", "sdb")' + + +# # Get synproxy counter statistics from procfs +# [[inputs.synproxy]] +# # no configuration + + +# # Sysstat metrics collector +# [[inputs.sysstat]] +# ## Path to the sadc command. +# # +# ## Common Defaults: +# ## Debian/Ubuntu: /usr/lib/sysstat/sadc +# ## Arch: /usr/lib/sa/sadc +# ## RHEL/CentOS: /usr/lib64/sa/sadc +# sadc_path = "/usr/lib/sa/sadc" # required +# +# ## Path to the sadf command, if it is not in PATH +# # sadf_path = "/usr/bin/sadf" +# +# ## Activities is a list of activities, that are passed as argument to the +# ## sadc collector utility (e.g: DISK, SNMP etc...) +# ## The more activities that are added, the more data is collected. +# # activities = ["DISK"] +# +# ## Group metrics to measurements. +# ## +# ## If group is false each metric will be prefixed with a description +# ## and represents itself a measurement. +# ## +# ## If Group is true, corresponding metrics are grouped to a single measurement. +# # group = true +# +# ## Options for the sadf command. The values on the left represent the sadf +# ## options and the values on the right their description (which are used for +# ## grouping and prefixing metrics). +# ## +# ## Run 'sar -h' or 'man sar' to find out the supported options for your +# ## sysstat version. +# [inputs.sysstat.options] +# -C = "cpu" +# -B = "paging" +# -b = "io" +# -d = "disk" # requires DISK activity +# "-n ALL" = "network" +# "-P ALL" = "per_cpu" +# -q = "queue" +# -R = "mem" +# -r = "mem_util" +# -S = "swap_util" +# -u = "cpu_util" +# -v = "inode" +# -W = "swap" +# -w = "task" +# # -H = "hugepages" # only available for newer linux distributions +# # "-I ALL" = "interrupts" # requires INT activity +# +# ## Device tags can be used to add additional tags for devices. +# ## For example the configuration below adds a tag vg with value rootvg for +# ## all metrics with sda devices. +# # [[inputs.sysstat.device_tags.sda]] +# # vg = "rootvg" + + +# # Gather systemd units state +# [[inputs.systemd_units]] +# ## Set timeout for systemctl execution +# # timeout = "1s" +# # +# ## Filter for a specific unit type, default is "service", other possible +# ## values are "socket", "target", "device", "mount", "automount", "swap", +# ## "timer", "path", "slice" and "scope ": +# # unittype = "service" + + +# # Reads metrics from a Teamspeak 3 Server via ServerQuery +# [[inputs.teamspeak]] +# ## Server address for Teamspeak 3 ServerQuery +# # server = "127.0.0.1:10011" +# ## Username for ServerQuery +# username = "serverqueryuser" +# ## Password for ServerQuery +# password = "secret" +# ## Array of virtual servers +# # virtual_servers = [1] + + +# # Read metrics about temperature +# [[inputs.temp]] +# # no configuration + + +# # Read Tengine's basic status information (ngx_http_reqstat_module) +# [[inputs.tengine]] +# # An array of Tengine reqstat module URI to gather stats. +# urls = ["http://127.0.0.1/us"] +# +# # HTTP response timeout (default: 5s) +# # response_timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.cer" +# # tls_key = "/etc/telegraf/key.key" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Gather metrics from the Tomcat server status page. +# [[inputs.tomcat]] +# ## URL of the Tomcat server status +# # url = "http://127.0.0.1:8080/manager/status/all?XML=true" +# +# ## HTTP Basic Auth Credentials +# # username = "tomcat" +# # password = "s3cret" +# +# ## Request timeout +# # timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Inserts sine and cosine waves for demonstration purposes +# [[inputs.trig]] +# ## Set the amplitude +# amplitude = 10.0 + + +# # Read Twemproxy stats data +# [[inputs.twemproxy]] +# ## Twemproxy stats address and port (no scheme) +# addr = "localhost:22222" +# ## Monitor pool name +# pools = ["redis_pool", "mc_pool"] + + +# # A plugin to collect stats from the Unbound DNS resolver +# [[inputs.unbound]] +# ## Address of server to connect to, read from unbound conf default, optionally ':port' +# ## Will lookup IP if given a hostname +# server = "127.0.0.1:8953" +# +# ## If running as a restricted user you can prepend sudo for additional access: +# # use_sudo = false +# +# ## The default location of the unbound-control binary can be overridden with: +# # binary = "/usr/sbin/unbound-control" +# +# ## The default location of the unbound config file can be overridden with: +# # config_file = "/etc/unbound/unbound.conf" +# +# ## The default timeout of 1s can be overriden with: +# # timeout = "1s" +# +# ## When set to true, thread metrics are tagged with the thread id. +# ## +# ## The default is false for backwards compatibility, and will be changed to +# ## true in a future version. It is recommended to set to true on new +# ## deployments. +# thread_as_tag = false + + +# # Read uWSGI metrics. +# [[inputs.uwsgi]] +# ## List with urls of uWSGI Stats servers. URL must match pattern: +# ## scheme://address[:port] +# ## +# ## For example: +# ## servers = ["tcp://localhost:5050", "http://localhost:1717", "unix:///tmp/statsock"] +# servers = ["tcp://127.0.0.1:1717"] +# +# ## General connection timout +# # timeout = "5s" + + +# # A plugin to collect stats from Varnish HTTP Cache +# [[inputs.varnish]] +# ## If running as a restricted user you can prepend sudo for additional access: +# #use_sudo = false +# +# ## The default location of the varnishstat binary can be overridden with: +# binary = "/usr/bin/varnishstat" +# +# ## By default, telegraf gather stats for 3 metric points. +# ## Setting stats will override the defaults shown below. +# ## Glob matching can be used, ie, stats = ["MAIN.*"] +# ## stats may also be set to ["*"], which will collect all stats +# stats = ["MAIN.cache_hit", "MAIN.cache_miss", "MAIN.uptime"] +# +# ## Optional name for the varnish instance (or working directory) to query +# ## Usually appened after -n in varnish cli +# # instance_name = instanceName +# +# ## Timeout for varnishstat command +# # timeout = "1s" + + +# # Collect Wireguard server interface and peer statistics +# [[inputs.wireguard]] +# ## Optional list of Wireguard device/interface names to query. +# ## If omitted, all Wireguard interfaces are queried. +# # devices = ["wg0"] + + +# # Monitor wifi signal strength and quality +# [[inputs.wireless]] +# ## Sets 'proc' directory path +# ## If not specified, then default is /proc +# # host_proc = "/proc" + + +# # Reads metrics from a SSL certificate +# [[inputs.x509_cert]] +# ## List certificate sources +# sources = ["/etc/ssl/certs/ssl-cert-snakeoil.pem", "tcp://example.org:443"] +# +# ## Timeout for SSL connection +# # timeout = "5s" +# +# ## Pass a different name into the TLS request (Server Name Indication) +# ## example: server_name = "myhost.example.org" +# # server_name = "" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" + + +# # Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools +# [[inputs.zfs]] +# ## ZFS kstat path. Ignored on FreeBSD +# ## If not specified, then default is: +# # kstatPath = "/proc/spl/kstat/zfs" +# +# ## By default, telegraf gather all zfs stats +# ## If not specified, then default is: +# # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] +# ## For Linux, the default is: +# # kstatMetrics = ["abdstats", "arcstats", "dnodestats", "dbufcachestats", +# # "dmu_tx", "fm", "vdev_mirror_stats", "zfetchstats", "zil"] +# ## By default, don't gather zpool stats +# # poolMetrics = false + + +# # Reads 'mntr' stats from one or many zookeeper servers +# [[inputs.zookeeper]] +# ## An array of address to gather stats about. Specify an ip or hostname +# ## with port. ie localhost:2181, 10.0.0.1:2181, etc. +# +# ## If no servers are specified, then localhost is used as the host. +# ## If no port is specified, 2181 is used +# servers = [":2181"] +# +# ## Timeout for metric collections from all servers. Minimum timeout is "1s". +# # timeout = "5s" +# +# ## Optional TLS Config +# # enable_tls = true +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## If false, skip chain & host verification +# # insecure_skip_verify = true + + +############################################################################### +# SERVICE INPUT PLUGINS # +############################################################################### + + +# # AMQP consumer plugin +# [[inputs.amqp_consumer]] +# ## Broker to consume from. +# ## deprecated in 1.7; use the brokers option +# # url = "amqp://localhost:5672/influxdb" +# +# ## Brokers to consume from. If multiple brokers are specified a random broker +# ## will be selected anytime a connection is established. This can be +# ## helpful for load balancing when not using a dedicated load balancer. +# brokers = ["amqp://localhost:5672/influxdb"] +# +# ## Authentication credentials for the PLAIN auth_method. +# # username = "" +# # password = "" +# +# ## Name of the exchange to declare. If unset, no exchange will be declared. +# exchange = "telegraf" +# +# ## Exchange type; common types are "direct", "fanout", "topic", "header", "x-consistent-hash". +# # exchange_type = "topic" +# +# ## If true, exchange will be passively declared. +# # exchange_passive = false +# +# ## Exchange durability can be either "transient" or "durable". +# # exchange_durability = "durable" +# +# ## Additional exchange arguments. +# # exchange_arguments = { } +# # exchange_arguments = {"hash_propery" = "timestamp"} +# +# ## AMQP queue name. +# queue = "telegraf" +# +# ## AMQP queue durability can be "transient" or "durable". +# queue_durability = "durable" +# +# ## If true, queue will be passively declared. +# # queue_passive = false +# +# ## A binding between the exchange and queue using this binding key is +# ## created. If unset, no binding is created. +# binding_key = "#" +# +# ## Maximum number of messages server should give to the worker. +# # prefetch_count = 50 +# +# ## Maximum messages to read from the broker that have not been written by an +# ## output. For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message from the queue contains 10 metrics and the +# ## output metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Auth method. PLAIN and EXTERNAL are supported +# ## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as +# ## described here: https://www.rabbitmq.com/plugins.html +# # auth_method = "PLAIN" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Content encoding for message payloads, can be set to "gzip" to or +# ## "identity" to apply no encoding. +# # content_encoding = "identity" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read Cassandra metrics through Jolokia +# [[inputs.cassandra]] +# ## DEPRECATED: The cassandra plugin has been deprecated. Please use the +# ## jolokia2 plugin instead. +# ## +# ## see https://github.com/influxdata/telegraf/tree/master/plugins/inputs/jolokia2 +# +# context = "/jolokia/read" +# ## List of cassandra servers exposing jolokia read service +# servers = ["myuser:mypassword@10.10.10.1:8778","10.10.10.2:8778",":8778"] +# ## List of metrics collected on above servers +# ## Each metric consists of a jmx path. +# ## This will collect all heap memory usage metrics from the jvm and +# ## ReadLatency metrics for all keyspaces and tables. +# ## "type=Table" in the query works with Cassandra3.0. Older versions might +# ## need to use "type=ColumnFamily" +# metrics = [ +# "/java.lang:type=Memory/HeapMemoryUsage", +# "/org.apache.cassandra.metrics:type=Table,keyspace=*,scope=*,name=ReadLatency" +# ] + + +# # Cisco GNMI telemetry input plugin based on GNMI telemetry data produced in IOS XR +# [[inputs.cisco_telemetry_gnmi]] +# ## Address and port of the GNMI GRPC server +# addresses = ["10.49.234.114:57777"] +# +# ## define credentials +# username = "cisco" +# password = "cisco" +# +# ## GNMI encoding requested (one of: "proto", "json", "json_ietf") +# # encoding = "proto" +# +# ## redial in case of failures after +# redial = "10s" +# +# ## enable client-side TLS and define CA to authenticate the device +# # enable_tls = true +# # tls_ca = "/etc/telegraf/ca.pem" +# # insecure_skip_verify = true +# +# ## define client-side TLS certificate & key to authenticate to the device +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## GNMI subscription prefix (optional, can usually be left empty) +# ## See: https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md#222-paths +# # origin = "" +# # prefix = "" +# # target = "" +# +# ## Define additional aliases to map telemetry encoding paths to simple measurement names +# #[inputs.cisco_telemetry_gnmi.aliases] +# # ifcounters = "openconfig:/interfaces/interface/state/counters" +# +# [[inputs.cisco_telemetry_gnmi.subscription]] +# ## Name of the measurement that will be emitted +# name = "ifcounters" +# +# ## Origin and path of the subscription +# ## See: https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md#222-paths +# ## +# ## origin usually refers to a (YANG) data model implemented by the device +# ## and path to a specific substructe inside it that should be subscribed to (similar to an XPath) +# ## YANG models can be found e.g. here: https://github.com/YangModels/yang/tree/master/vendor/cisco/xr +# origin = "openconfig-interfaces" +# path = "/interfaces/interface/state/counters" +# +# # Subscription mode (one of: "target_defined", "sample", "on_change") and interval +# subscription_mode = "sample" +# sample_interval = "10s" +# +# ## Suppress redundant transmissions when measured values are unchanged +# # suppress_redundant = false +# +# ## If suppression is enabled, send updates at least every X seconds anyway +# # heartbeat_interval = "60s" + + +# # Cisco model-driven telemetry (MDT) input plugin for IOS XR, IOS XE and NX-OS platforms +# [[inputs.cisco_telemetry_mdt]] +# ## Telemetry transport can be "tcp" or "grpc". TLS is only supported when +# ## using the grpc transport. +# transport = "grpc" +# +# ## Address and port to host telemetry listener +# service_address = ":57000" +# +# ## Enable TLS; grpc transport only. +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Enable TLS client authentication and define allowed CA certificates; grpc +# ## transport only. +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Define (for certain nested telemetry measurements with embedded tags) which fields are tags +# # embedded_tags = ["Cisco-IOS-XR-qos-ma-oper:qos/interface-table/interface/input/service-policy-names/service-policy-instance/statistics/class-stats/class-name"] +# +# ## Define aliases to map telemetry encoding paths to simple measurement names +# [inputs.cisco_telemetry_mdt.aliases] +# ifstats = "ietf-interfaces:interfaces-state/interface/statistics" + + +# # Read metrics from one or many ClickHouse servers +# [[inputs.clickhouse]] +# ## Username for authorization on ClickHouse server +# ## example: user = "default"" +# username = "default" +# +# ## Password for authorization on ClickHouse server +# ## example: password = "super_secret" +# +# ## HTTP(s) timeout while getting metrics values +# ## The timeout includes connection time, any redirects, and reading the response body. +# ## example: timeout = 1s +# # timeout = 5s +# +# ## List of servers for metrics scraping +# ## metrics scrape via HTTP(s) clickhouse interface +# ## https://clickhouse.tech/docs/en/interfaces/http/ +# ## example: servers = ["http://127.0.0.1:8123","https://custom-server.mdb.yandexcloud.net"] +# servers = ["http://127.0.0.1:8123"] +# +# ## If "auto_discovery"" is "true" plugin tries to connect to all servers available in the cluster +# ## with using same "user:password" described in "user" and "password" parameters +# ## and get this server hostname list from "system.clusters" table +# ## see +# ## - https://clickhouse.tech/docs/en/operations/system_tables/#system-clusters +# ## - https://clickhouse.tech/docs/en/operations/server_settings/settings/#server_settings_remote_servers +# ## - https://clickhouse.tech/docs/en/operations/table_engines/distributed/ +# ## - https://clickhouse.tech/docs/en/operations/table_engines/replication/#creating-replicated-tables +# ## example: auto_discovery = false +# # auto_discovery = true +# +# ## Filter cluster names in "system.clusters" when "auto_discovery" is "true" +# ## when this filter present then "WHERE cluster IN (...)" filter will apply +# ## please use only full cluster names here, regexp and glob filters is not allowed +# ## for "/etc/clickhouse-server/config.d/remote.xml" +# ## +# ## +# ## +# ## +# ## clickhouse-ru-1.local9000 +# ## clickhouse-ru-2.local9000 +# ## +# ## +# ## clickhouse-eu-1.local9000 +# ## clickhouse-eu-2.local9000 +# ## +# ## +# ## +# ## +# ## +# ## +# ## example: cluster_include = ["my-own-cluster"] +# # cluster_include = [] +# +# ## Filter cluster names in "system.clusters" when "auto_discovery" is "true" +# ## when this filter present then "WHERE cluster NOT IN (...)" filter will apply +# ## example: cluster_exclude = ["my-internal-not-discovered-cluster"] +# # cluster_exclude = [] +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read metrics from Google PubSub +# [[inputs.cloud_pubsub]] +# ## Required. Name of Google Cloud Platform (GCP) Project that owns +# ## the given PubSub subscription. +# project = "my-project" +# +# ## Required. Name of PubSub subscription to ingest metrics from. +# subscription = "my-subscription" +# +# ## Required. Data format to consume. +# ## Each data format has its own unique set of configuration options. +# ## Read more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" +# +# ## Optional. Filepath for GCP credentials JSON file to authorize calls to +# ## PubSub APIs. If not set explicitly, Telegraf will attempt to use +# ## Application Default Credentials, which is preferred. +# # credentials_file = "path/to/my/creds.json" +# +# ## Optional. Number of seconds to wait before attempting to restart the +# ## PubSub subscription receiver after an unexpected error. +# ## If the streaming pull for a PubSub Subscription fails (receiver), +# ## the agent attempts to restart receiving messages after this many seconds. +# # retry_delay_seconds = 5 +# +# ## Optional. Maximum byte length of a message to consume. +# ## Larger messages are dropped with an error. If less than 0 or unspecified, +# ## treated as no limit. +# # max_message_len = 1000000 +# +# ## Optional. Maximum messages to read from PubSub that have not been written +# ## to an output. Defaults to 1000. +# ## For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message contains 10 metrics and the output +# ## metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## The following are optional Subscription ReceiveSettings in PubSub. +# ## Read more about these values: +# ## https://godoc.org/cloud.google.com/go/pubsub#ReceiveSettings +# +# ## Optional. Maximum number of seconds for which a PubSub subscription +# ## should auto-extend the PubSub ACK deadline for each message. If less than +# ## 0, auto-extension is disabled. +# # max_extension = 0 +# +# ## Optional. Maximum number of unprocessed messages in PubSub +# ## (unacknowledged but not yet expired in PubSub). +# ## A value of 0 is treated as the default PubSub value. +# ## Negative values will be treated as unlimited. +# # max_outstanding_messages = 0 +# +# ## Optional. Maximum size in bytes of unprocessed messages in PubSub +# ## (unacknowledged but not yet expired in PubSub). +# ## A value of 0 is treated as the default PubSub value. +# ## Negative values will be treated as unlimited. +# # max_outstanding_bytes = 0 +# +# ## Optional. Max number of goroutines a PubSub Subscription receiver can spawn +# ## to pull messages from PubSub concurrently. This limit applies to each +# ## subscription separately and is treated as the PubSub default if less than +# ## 1. Note this setting does not limit the number of messages that can be +# ## processed concurrently (use "max_outstanding_messages" instead). +# # max_receiver_go_routines = 0 +# +# ## Optional. If true, Telegraf will attempt to base64 decode the +# ## PubSub message data before parsing +# # base64_data = false + + +# # Google Cloud Pub/Sub Push HTTP listener +# [[inputs.cloud_pubsub_push]] +# ## Address and port to host HTTP listener on +# service_address = ":8080" +# +# ## Application secret to verify messages originate from Cloud Pub/Sub +# # token = "" +# +# ## Path to listen to. +# # path = "/" +# +# ## Maximum duration before timing out read of the request +# # read_timeout = "10s" +# ## Maximum duration before timing out write of the response. This should be set to a value +# ## large enough that you can send at least 'metric_batch_size' number of messages within the +# ## duration. +# # write_timeout = "10s" +# +# ## Maximum allowed http request body size in bytes. +# ## 0 means to use the default of 524,288,00 bytes (500 mebibytes) +# # max_body_size = "500MB" +# +# ## Whether to add the pubsub metadata, such as message attributes and subscription as a tag. +# # add_meta = false +# +# ## Optional. Maximum messages to read from PubSub that have not been written +# ## to an output. Defaults to 1000. +# ## For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message contains 10 metrics and the output +# ## metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Set one or more allowed client CA certificate file names to +# ## enable mutually authenticated TLS connections +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Add service certificate and key +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read logging output from the Docker engine +# [[inputs.docker_log]] +# ## Docker Endpoint +# ## To use TCP, set endpoint = "tcp://[ip]:[port]" +# ## To use environment variables (ie, docker-machine), set endpoint = "ENV" +# # endpoint = "unix:///var/run/docker.sock" +# +# ## When true, container logs are read from the beginning; otherwise +# ## reading begins at the end of the log. +# # from_beginning = false +# +# ## Timeout for Docker API calls. +# # timeout = "5s" +# +# ## Containers to include and exclude. Globs accepted. +# ## Note that an empty array for both will include all containers +# # container_name_include = [] +# # container_name_exclude = [] +# +# ## Container states to include and exclude. Globs accepted. +# ## When empty only containers in the "running" state will be captured. +# # container_state_include = [] +# # container_state_exclude = [] +# +# ## docker labels to include and exclude as tags. Globs accepted. +# ## Note that an empty array for both will include all labels as tags +# # docker_label_include = [] +# # docker_label_exclude = [] +# +# ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars +# source_tag = false +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # Azure Event Hubs service input plugin +# [[inputs.eventhub_consumer]] +# ## The default behavior is to create a new Event Hub client from environment variables. +# ## This requires one of the following sets of environment variables to be set: +# ## +# ## 1) Expected Environment Variables: +# ## - "EVENTHUB_NAMESPACE" +# ## - "EVENTHUB_NAME" +# ## - "EVENTHUB_CONNECTION_STRING" +# ## +# ## 2) Expected Environment Variables: +# ## - "EVENTHUB_NAMESPACE" +# ## - "EVENTHUB_NAME" +# ## - "EVENTHUB_KEY_NAME" +# ## - "EVENTHUB_KEY_VALUE" +# +# ## Uncommenting the option below will create an Event Hub client based solely on the connection string. +# ## This can either be the associated environment variable or hard coded directly. +# # connection_string = "" +# +# ## Set persistence directory to a valid folder to use a file persister instead of an in-memory persister +# # persistence_dir = "" +# +# ## Change the default consumer group +# # consumer_group = "" +# +# ## By default the event hub receives all messages present on the broker, alternative modes can be set below. +# ## The timestamp should be in https://github.com/toml-lang/toml#offset-date-time format (RFC 3339). +# ## The 3 options below only apply if no valid persister is read from memory or file (e.g. first run). +# # from_timestamp = +# # latest = true +# +# ## Set a custom prefetch count for the receiver(s) +# # prefetch_count = 1000 +# +# ## Add an epoch to the receiver(s) +# # epoch = 0 +# +# ## Change to set a custom user agent, "telegraf" is used by default +# # user_agent = "telegraf" +# +# ## To consume from a specific partition, set the partition_ids option. +# ## An empty array will result in receiving from all partitions. +# # partition_ids = ["0","1"] +# +# ## Max undelivered messages +# # max_undelivered_messages = 1000 +# +# ## Set either option below to true to use a system property as timestamp. +# ## You have the choice between EnqueuedTime and IoTHubEnqueuedTime. +# ## It is recommended to use this setting when the data itself has no timestamp. +# # enqueued_time_as_ts = true +# # iot_hub_enqueued_time_as_ts = true +# +# ## Tags or fields to create from keys present in the application property bag. +# ## These could for example be set by message enrichments in Azure IoT Hub. +# # application_property_tags = [] +# # application_property_fields = [] +# +# ## Tag or field name to use for metadata +# ## By default all metadata is disabled +# # sequence_number_field = "SequenceNumber" +# # enqueued_time_field = "EnqueuedTime" +# # offset_field = "Offset" +# # partition_id_tag = "PartitionID" +# # partition_key_tag = "PartitionKey" +# # iot_hub_device_connection_id_tag = "IoTHubDeviceConnectionID" +# # iot_hub_auth_generation_id_tag = "IoTHubAuthGenerationID" +# # iot_hub_connection_auth_method_tag = "IoTHubConnectionAuthMethod" +# # iot_hub_connection_module_id_tag = "IoTHubConnectionModuleID" +# # iot_hub_enqueued_time_field = "IoTHubEnqueuedTime" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Run executable as long-running input plugin +# [[inputs.execd]] +# ## Program to run as daemon +# command = ["telegraf-smartctl", "-d", "/dev/sda"] +# +# ## Define how the process is signaled on each collection interval. +# ## Valid values are: +# ## "none" : Do not signal anything. +# ## The process must output metrics by itself. +# ## "STDIN" : Send a newline on STDIN. +# ## "SIGHUP" : Send a HUP signal. Not available on Windows. +# ## "SIGUSR1" : Send a USR1 signal. Not available on Windows. +# ## "SIGUSR2" : Send a USR2 signal. Not available on Windows. +# signal = "none" +# +# ## Delay before the process is restarted after an unexpected termination +# restart_delay = "10s" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Accept metrics over InfluxDB 1.x HTTP API +# [[inputs.http_listener]] +# ## Address and port to host InfluxDB listener on +# service_address = ":8186" +# +# ## maximum duration before timing out read of the request +# read_timeout = "10s" +# ## maximum duration before timing out write of the response +# write_timeout = "10s" +# +# ## Maximum allowed HTTP request body size in bytes. +# ## 0 means to use the default of 32MiB. +# max_body_size = "32MiB" +# +# ## Optional tag name used to store the database. +# ## If the write has a database in the query string then it will be kept in this tag name. +# ## This tag can be used in downstream outputs. +# ## The default value of nothing means it will be off and the database will not be recorded. +# # database_tag = "" +# +# ## Set one or more allowed client CA certificate file names to +# ## enable mutually authenticated TLS connections +# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Add service certificate and key +# tls_cert = "/etc/telegraf/cert.pem" +# tls_key = "/etc/telegraf/key.pem" +# +# ## Optional username and password to accept for HTTP basic authentication. +# ## You probably want to make sure you have TLS configured above for this. +# # basic_username = "foobar" +# # basic_password = "barfoo" + + +# # Generic HTTP write listener +# [[inputs.http_listener_v2]] +# ## Address and port to host HTTP listener on +# service_address = ":8080" +# +# ## Path to listen to. +# # path = "/telegraf" +# +# ## HTTP methods to accept. +# # methods = ["POST", "PUT"] +# +# ## maximum duration before timing out read of the request +# # read_timeout = "10s" +# ## maximum duration before timing out write of the response +# # write_timeout = "10s" +# +# ## Maximum allowed http request body size in bytes. +# ## 0 means to use the default of 524,288,00 bytes (500 mebibytes) +# # max_body_size = "500MB" +# +# ## Part of the request to consume. Available options are "body" and +# ## "query". +# # data_source = "body" +# +# ## Set one or more allowed client CA certificate file names to +# ## enable mutually authenticated TLS connections +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Add service certificate and key +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Optional username and password to accept for HTTP basic authentication. +# ## You probably want to make sure you have TLS configured above for this. +# # basic_username = "foobar" +# # basic_password = "barfoo" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Accept metrics over InfluxDB 1.x HTTP API +# [[inputs.influxdb_listener]] +# ## Address and port to host InfluxDB listener on +# service_address = ":8186" +# +# ## maximum duration before timing out read of the request +# read_timeout = "10s" +# ## maximum duration before timing out write of the response +# write_timeout = "10s" +# +# ## Maximum allowed HTTP request body size in bytes. +# ## 0 means to use the default of 32MiB. +# max_body_size = "32MiB" +# +# ## Optional tag name used to store the database. +# ## If the write has a database in the query string then it will be kept in this tag name. +# ## This tag can be used in downstream outputs. +# ## The default value of nothing means it will be off and the database will not be recorded. +# # database_tag = "" +# +# ## Set one or more allowed client CA certificate file names to +# ## enable mutually authenticated TLS connections +# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Add service certificate and key +# tls_cert = "/etc/telegraf/cert.pem" +# tls_key = "/etc/telegraf/key.pem" +# +# ## Optional username and password to accept for HTTP basic authentication. +# ## You probably want to make sure you have TLS configured above for this. +# # basic_username = "foobar" +# # basic_password = "barfoo" + + +# # Read JTI OpenConfig Telemetry from listed sensors +# [[inputs.jti_openconfig_telemetry]] +# ## List of device addresses to collect telemetry from +# servers = ["localhost:1883"] +# +# ## Authentication details. Username and password are must if device expects +# ## authentication. Client ID must be unique when connecting from multiple instances +# ## of telegraf to the same device +# username = "user" +# password = "pass" +# client_id = "telegraf" +# +# ## Frequency to get data +# sample_frequency = "1000ms" +# +# ## Sensors to subscribe for +# ## A identifier for each sensor can be provided in path by separating with space +# ## Else sensor path will be used as identifier +# ## When identifier is used, we can provide a list of space separated sensors. +# ## A single subscription will be created with all these sensors and data will +# ## be saved to measurement with this identifier name +# sensors = [ +# "/interfaces/", +# "collection /components/ /lldp", +# ] +# +# ## We allow specifying sensor group level reporting rate. To do this, specify the +# ## reporting rate in Duration at the beginning of sensor paths / collection +# ## name. For entries without reporting rate, we use configured sample frequency +# sensors = [ +# "1000ms customReporting /interfaces /lldp", +# "2000ms collection /components", +# "/interfaces", +# ] +# +# ## Optional TLS Config +# # enable_tls = true +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Delay between retry attempts of failed RPC calls or streams. Defaults to 1000ms. +# ## Failed streams/calls will not be retried if 0 is provided +# retry_delay = "1000ms" +# +# ## To treat all string values as tags, set this to true +# str_as_tags = false + + +# # Read metrics from Kafka topics +# [[inputs.kafka_consumer]] +# ## Kafka brokers. +# brokers = ["localhost:9092"] +# +# ## Topics to consume. +# topics = ["telegraf"] +# +# ## When set this tag will be added to all metrics with the topic as the value. +# # topic_tag = "" +# +# ## Optional Client id +# # client_id = "Telegraf" +# +# ## Set the minimal supported Kafka version. Setting this enables the use of new +# ## Kafka features and APIs. Must be 0.10.2.0 or greater. +# ## ex: version = "1.1.0" +# # version = "" +# +# ## Optional TLS Config +# # enable_tls = true +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## SASL authentication credentials. These settings should typically be used +# ## with TLS encryption enabled using the "enable_tls" option. +# # sasl_username = "kafka" +# # sasl_password = "secret" +# +# ## SASL protocol version. When connecting to Azure EventHub set to 0. +# # sasl_version = 1 +# +# ## Name of the consumer group. +# # consumer_group = "telegraf_metrics_consumers" +# +# ## Initial offset position; one of "oldest" or "newest". +# # offset = "oldest" +# +# ## Consumer group partition assignment strategy; one of "range", "roundrobin" or "sticky". +# # balance_strategy = "range" +# +# ## Maximum length of a message to consume, in bytes (default 0/unlimited); +# ## larger messages are dropped +# max_message_len = 1000000 +# +# ## Maximum messages to read from the broker that have not been written by an +# ## output. For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message from the queue contains 10 metrics and the +# ## output metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics from Kafka topic(s) +# [[inputs.kafka_consumer_legacy]] +# ## topic(s) to consume +# topics = ["telegraf"] +# +# ## an array of Zookeeper connection strings +# zookeeper_peers = ["localhost:2181"] +# +# ## Zookeeper Chroot +# zookeeper_chroot = "" +# +# ## the name of the consumer group +# consumer_group = "telegraf_metrics_consumers" +# +# ## Offset (must be either "oldest" or "newest") +# offset = "oldest" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" +# +# ## Maximum length of a message to consume, in bytes (default 0/unlimited); +# ## larger messages are dropped +# max_message_len = 65536 + + +# # Configuration for the AWS Kinesis input. +# [[inputs.kinesis_consumer]] +# ## Amazon REGION of kinesis endpoint. +# region = "ap-southeast-2" +# +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile +# # access_key = "" +# # secret_key = "" +# # token = "" +# # role_arn = "" +# # profile = "" +# # shared_credential_file = "" +# +# ## Endpoint to make request against, the correct endpoint is automatically +# ## determined and this option should only be set if you wish to override the +# ## default. +# ## ex: endpoint_url = "http://localhost:8000" +# # endpoint_url = "" +# +# ## Kinesis StreamName must exist prior to starting telegraf. +# streamname = "StreamName" +# +# ## Shard iterator type (only 'TRIM_HORIZON' and 'LATEST' currently supported) +# # shard_iterator_type = "TRIM_HORIZON" +# +# ## Maximum messages to read from the broker that have not been written by an +# ## output. For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message from the queue contains 10 metrics and the +# ## output metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" +# +# ## Optional +# ## Configuration for a dynamodb checkpoint +# [inputs.kinesis_consumer.checkpoint_dynamodb] +# ## unique name for this consumer +# app_name = "default" +# table_name = "default" + + +# # Read metrics off Arista LANZ, via socket +# [[inputs.lanz]] +# ## URL to Arista LANZ endpoint +# servers = [ +# "tcp://127.0.0.1:50001" +# ] + + +# # Stream and parse log file(s). +# [[inputs.logparser]] +# ## Log files to parse. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## /var/log/**.log -> recursively find all .log files in /var/log +# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +# ## /var/log/apache.log -> only tail the apache log file +# files = ["/var/log/apache/access.log"] +# +# ## Read files that currently exist from the beginning. Files that are created +# ## while telegraf is running (and that match the "files" globs) will always +# ## be read from the beginning. +# from_beginning = false +# +# ## Method used to watch for file updates. Can be either "inotify" or "poll". +# # watch_method = "inotify" +# +# ## Parse logstash-style "grok" patterns: +# [inputs.logparser.grok] +# ## This is a list of patterns to check the given log file(s) for. +# ## Note that adding patterns here increases processing time. The most +# ## efficient configuration is to have one pattern per logparser. +# ## Other common built-in patterns are: +# ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) +# ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) +# patterns = ["%{COMBINED_LOG_FORMAT}"] +# +# ## Name of the outputted measurement name. +# measurement = "apache_access_log" +# +# ## Full path(s) to custom pattern files. +# custom_pattern_files = [] +# +# ## Custom patterns can also be defined here. Put one pattern per line. +# custom_patterns = ''' +# ''' +# +# ## Timezone allows you to provide an override for timestamps that +# ## don't already include an offset +# ## e.g. 04/06/2016 12:41:45 data one two 5.43µs +# ## +# ## Default: "" which renders UTC +# ## Options are as follows: +# ## 1. Local -- interpret based on machine localtime +# ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +# ## 3. UTC -- or blank/unspecified, will return timestamp in UTC +# # timezone = "Canada/Eastern" +# +# ## When set to "disable", timestamp will not incremented if there is a +# ## duplicate. +# # unique_timestamp = "auto" + + +# # Read metrics from MQTT topic(s) +# [[inputs.mqtt_consumer]] +# ## MQTT broker URLs to be used. The format should be scheme://host:port, +# ## schema can be tcp, ssl, or ws. +# servers = ["tcp://127.0.0.1:1883"] +# +# ## Topics that will be subscribed to. +# topics = [ +# "telegraf/host01/cpu", +# "telegraf/+/mem", +# "sensors/#", +# ] +# +# ## The message topic will be stored in a tag specified by this value. If set +# ## to the empty string no topic tag will be created. +# # topic_tag = "topic" +# +# ## QoS policy for messages +# ## 0 = at most once +# ## 1 = at least once +# ## 2 = exactly once +# ## +# ## When using a QoS of 1 or 2, you should enable persistent_session to allow +# ## resuming unacknowledged messages. +# # qos = 0 +# +# ## Connection timeout for initial connection in seconds +# # connection_timeout = "30s" +# +# ## Maximum messages to read from the broker that have not been written by an +# ## output. For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message from the queue contains 10 metrics and the +# ## output metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Persistent session disables clearing of the client session on connection. +# ## In order for this option to work you must also set client_id to identify +# ## the client. To receive messages that arrived while the client is offline, +# ## also set the qos option to 1 or 2 and don't forget to also set the QoS when +# ## publishing. +# # persistent_session = false +# +# ## If unset, a random client ID will be generated. +# # client_id = "" +# +# ## Username and password to connect MQTT server. +# # username = "telegraf" +# # password = "metricsmetricsmetricsmetrics" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics from NATS subject(s) +# [[inputs.nats_consumer]] +# ## urls of NATS servers +# servers = ["nats://localhost:4222"] +# +# ## subject(s) to consume +# subjects = ["telegraf"] +# +# ## name a queue group +# queue_group = "telegraf_consumers" +# +# ## Optional credentials +# # username = "" +# # password = "" +# +# ## Optional NATS 2.0 and NATS NGS compatible user credentials +# # credentials = "/etc/telegraf/nats.creds" +# +# ## Use Transport Layer Security +# # secure = false +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Sets the limits for pending msgs and bytes for each subscription +# ## These shouldn't need to be adjusted except in very high throughput scenarios +# # pending_message_limit = 65536 +# # pending_bytes_limit = 67108864 +# +# ## Maximum messages to read from the broker that have not been written by an +# ## output. For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message from the queue contains 10 metrics and the +# ## output metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read NSQ topic for metrics. +# [[inputs.nsq_consumer]] +# ## Server option still works but is deprecated, we just prepend it to the nsqd array. +# # server = "localhost:4150" +# +# ## An array representing the NSQD TCP HTTP Endpoints +# nsqd = ["localhost:4150"] +# +# ## An array representing the NSQLookupd HTTP Endpoints +# nsqlookupd = ["localhost:4161"] +# topic = "telegraf" +# channel = "consumer" +# max_in_flight = 100 +# +# ## Maximum messages to read from the broker that have not been written by an +# ## output. For best throughput set based on the number of metrics within +# ## each message and the size of the output's metric_batch_size. +# ## +# ## For example, if each message from the queue contains 10 metrics and the +# ## output metric_batch_size is 1000, setting this to 100 will ensure that a +# ## full batch is collected and the write is triggered immediately without +# ## waiting until the next flush_interval. +# # max_undelivered_messages = 1000 +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics from one or many pgbouncer servers +# [[inputs.pgbouncer]] +# ## specify address via a url matching: +# ## postgres://[pqgotest[:password]]@localhost[/dbname]\ +# ## ?sslmode=[disable|verify-ca|verify-full] +# ## or a simple string: +# ## host=localhost user=pqotest password=... sslmode=... dbname=app_production +# ## +# ## All connection parameters are optional. +# ## +# address = "host=localhost user=pgbouncer sslmode=disable" + + +# # Read metrics from one or many postgresql servers +# [[inputs.postgresql]] +# ## specify address via a url matching: +# ## postgres://[pqgotest[:password]]@localhost[/dbname]\ +# ## ?sslmode=[disable|verify-ca|verify-full] +# ## or a simple string: +# ## host=localhost user=pqotest password=... sslmode=... dbname=app_production +# ## +# ## All connection parameters are optional. +# ## +# ## Without the dbname parameter, the driver will default to a database +# ## with the same name as the user. This dbname is just for instantiating a +# ## connection with the server and doesn't restrict the databases we are trying +# ## to grab metrics for. +# ## +# address = "host=localhost user=postgres sslmode=disable" +# ## A custom name for the database that will be used as the "server" tag in the +# ## measurement output. If not specified, a default one generated from +# ## the connection address is used. +# # outputaddress = "db01" +# +# ## connection configuration. +# ## maxlifetime - specify the maximum lifetime of a connection. +# ## default is forever (0s) +# max_lifetime = "0s" +# +# ## A list of databases to explicitly ignore. If not specified, metrics for all +# ## databases are gathered. Do NOT use with the 'databases' option. +# # ignored_databases = ["postgres", "template0", "template1"] +# +# ## A list of databases to pull metrics about. If not specified, metrics for all +# ## databases are gathered. Do NOT use with the 'ignored_databases' option. +# # databases = ["app_production", "testing"] + + +# # Read metrics from one or many postgresql servers +# [[inputs.postgresql_extensible]] +# ## specify address via a url matching: +# ## postgres://[pqgotest[:password]]@localhost[/dbname]\ +# ## ?sslmode=[disable|verify-ca|verify-full] +# ## or a simple string: +# ## host=localhost user=pqotest password=... sslmode=... dbname=app_production +# # +# ## All connection parameters are optional. # +# ## Without the dbname parameter, the driver will default to a database +# ## with the same name as the user. This dbname is just for instantiating a +# ## connection with the server and doesn't restrict the databases we are trying +# ## to grab metrics for. +# # +# address = "host=localhost user=postgres sslmode=disable" +# +# ## connection configuration. +# ## maxlifetime - specify the maximum lifetime of a connection. +# ## default is forever (0s) +# max_lifetime = "0s" +# +# ## A list of databases to pull metrics about. If not specified, metrics for all +# ## databases are gathered. +# ## databases = ["app_production", "testing"] +# # +# ## A custom name for the database that will be used as the "server" tag in the +# ## measurement output. If not specified, a default one generated from +# ## the connection address is used. +# # outputaddress = "db01" +# # +# ## Define the toml config where the sql queries are stored +# ## New queries can be added, if the withdbname is set to true and there is no +# ## databases defined in the 'databases field', the sql query is ended by a +# ## 'is not null' in order to make the query succeed. +# ## Example : +# ## The sqlquery : "SELECT * FROM pg_stat_database where datname" become +# ## "SELECT * FROM pg_stat_database where datname IN ('postgres', 'pgbench')" +# ## because the databases variable was set to ['postgres', 'pgbench' ] and the +# ## withdbname was true. Be careful that if the withdbname is set to false you +# ## don't have to define the where clause (aka with the dbname) the tagvalue +# ## field is used to define custom tags (separated by commas) +# ## The optional "measurement" value can be used to override the default +# ## output measurement name ("postgresql"). +# ## +# ## The script option can be used to specify the .sql file path. +# ## If script and sqlquery options specified at same time, sqlquery will be used +# ## +# ## Structure : +# ## [[inputs.postgresql_extensible.query]] +# ## sqlquery string +# ## version string +# ## withdbname boolean +# ## tagvalue string (comma separated) +# ## measurement string +# [[inputs.postgresql_extensible.query]] +# sqlquery="SELECT * FROM pg_stat_database" +# version=901 +# withdbname=false +# tagvalue="" +# measurement="" +# [[inputs.postgresql_extensible.query]] +# sqlquery="SELECT * FROM pg_stat_bgwriter" +# version=901 +# withdbname=false +# tagvalue="postgresql.stats" + + +# # Read metrics from one or many prometheus clients +# [[inputs.prometheus]] +# ## An array of urls to scrape metrics from. +# urls = ["http://localhost:9100/metrics"] +# +# ## Metric version controls the mapping from Prometheus metrics into +# ## Telegraf metrics. When using the prometheus_client output, use the same +# ## value in both plugins to ensure metrics are round-tripped without +# ## modification. +# ## +# ## example: metric_version = 1; deprecated in 1.13 +# ## metric_version = 2; recommended version +# # metric_version = 1 +# +# ## Url tag name (tag containing scrapped url. optional, default is "url") +# # url_tag = "scrapeUrl" +# +# ## An array of Kubernetes services to scrape metrics from. +# # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"] +# +# ## Kubernetes config file to create client from. +# # kube_config = "/path/to/kubernetes.config" +# +# ## Scrape Kubernetes pods for the following prometheus annotations: +# ## - prometheus.io/scrape: Enable scraping for this pod +# ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to +# ## set this to 'https' & most likely set the tls config. +# ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. +# ## - prometheus.io/port: If port is not 9102 use this annotation +# # monitor_kubernetes_pods = true +# ## Restricts Kubernetes monitoring to a single namespace +# ## ex: monitor_kubernetes_pods_namespace = "default" +# # monitor_kubernetes_pods_namespace = "" +# # label selector to target pods which have the label +# # kubernetes_label_selector = "env=dev,app=nginx" +# # field selector to target pods +# # eg. To scrape pods on a specific node +# # kubernetes_field_selector = "spec.nodeName=$HOSTNAME" +# +# ## Use bearer token for authorization. ('bearer_token' takes priority) +# # bearer_token = "/path/to/bearer/token" +# ## OR +# # bearer_token_string = "abc_123" +# +# ## HTTP Basic Authentication username and password. ('bearer_token' and +# ## 'bearer_token_string' take priority) +# # username = "" +# # password = "" +# +# ## Specify timeout duration for slower prometheus clients (default is 3s) +# # response_timeout = "3s" +# +# ## Optional TLS Config +# # tls_ca = /path/to/cafile +# # tls_cert = /path/to/certfile +# # tls_key = /path/to/keyfile +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false + + +# # SFlow V5 Protocol Listener +# [[inputs.sflow]] +# ## Address to listen for sFlow packets. +# ## example: service_address = "udp://:6343" +# ## service_address = "udp4://:6343" +# ## service_address = "udp6://:6343" +# service_address = "udp://:6343" +# +# ## Set the size of the operating system's receive buffer. +# ## example: read_buffer_size = "64KiB" +# # read_buffer_size = "" + + +# # Receive SNMP traps +# [[inputs.snmp_trap]] +# ## Transport, local address, and port to listen on. Transport must +# ## be "udp://". Omit local address to listen on all interfaces. +# ## example: "udp://127.0.0.1:1234" +# ## +# ## Special permissions may be required to listen on a port less than +# ## 1024. See README.md for details +# ## +# # service_address = "udp://:162" +# ## Timeout running snmptranslate command +# # timeout = "5s" + + +# # Generic socket listener capable of handling multiple socket types. +# [[inputs.socket_listener]] +# ## URL to listen on +# # service_address = "tcp://:8094" +# # service_address = "tcp://127.0.0.1:http" +# # service_address = "tcp4://:8094" +# # service_address = "tcp6://:8094" +# # service_address = "tcp6://[2001:db8::1]:8094" +# # service_address = "udp://:8094" +# # service_address = "udp4://:8094" +# # service_address = "udp6://:8094" +# # service_address = "unix:///tmp/telegraf.sock" +# # service_address = "unixgram:///tmp/telegraf.sock" +# +# ## Change the file mode bits on unix sockets. These permissions may not be +# ## respected by some platforms, to safely restrict write permissions it is best +# ## to place the socket into a directory that has previously been created +# ## with the desired permissions. +# ## ex: socket_mode = "777" +# # socket_mode = "" +# +# ## Maximum number of concurrent connections. +# ## Only applies to stream sockets (e.g. TCP). +# ## 0 (default) is unlimited. +# # max_connections = 1024 +# +# ## Read timeout. +# ## Only applies to stream sockets (e.g. TCP). +# ## 0 (default) is unlimited. +# # read_timeout = "30s" +# +# ## Optional TLS configuration. +# ## Only applies to stream sockets (e.g. TCP). +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Enables client authentication if set. +# # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] +# +# ## Maximum socket buffer size (in bytes when no unit specified). +# ## For stream sockets, once the buffer fills up, the sender will start backing up. +# ## For datagram sockets, once the buffer fills up, metrics will start dropping. +# ## Defaults to the OS default. +# # read_buffer_size = "64KiB" +# +# ## Period between keep alive probes. +# ## Only applies to TCP sockets. +# ## 0 disables keep alive probes. +# ## Defaults to the OS configuration. +# # keep_alive_period = "5m" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# # data_format = "influx" +# +# ## Content encoding for message payloads, can be set to "gzip" to or +# ## "identity" to apply no encoding. +# # content_encoding = "identity" + + +# # Statsd UDP/TCP Server +# [[inputs.statsd]] +# ## Protocol, must be "tcp", "udp", "udp4" or "udp6" (default=udp) +# protocol = "udp" +# +# ## MaxTCPConnection - applicable when protocol is set to tcp (default=250) +# max_tcp_connections = 250 +# +# ## Enable TCP keep alive probes (default=false) +# tcp_keep_alive = false +# +# ## Specifies the keep-alive period for an active network connection. +# ## Only applies to TCP sockets and will be ignored if tcp_keep_alive is false. +# ## Defaults to the OS configuration. +# # tcp_keep_alive_period = "2h" +# +# ## Address and port to host UDP listener on +# service_address = ":8125" +# +# ## The following configuration options control when telegraf clears it's cache +# ## of previous values. If set to false, then telegraf will only clear it's +# ## cache when the daemon is restarted. +# ## Reset gauges every interval (default=true) +# delete_gauges = true +# ## Reset counters every interval (default=true) +# delete_counters = true +# ## Reset sets every interval (default=true) +# delete_sets = true +# ## Reset timings & histograms every interval (default=true) +# delete_timings = true +# +# ## Percentiles to calculate for timing & histogram stats +# percentiles = [50.0, 90.0, 99.0, 99.9, 99.95, 100.0] +# +# ## separator to use between elements of a statsd metric +# metric_separator = "_" +# +# ## Parses tags in the datadog statsd format +# ## http://docs.datadoghq.com/guides/dogstatsd/ +# parse_data_dog_tags = false +# +# ## Parses datadog extensions to the statsd format +# datadog_extensions = false +# +# ## Statsd data translation templates, more info can be read here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md +# # templates = [ +# # "cpu.* measurement*" +# # ] +# +# ## Number of UDP messages allowed to queue up, once filled, +# ## the statsd server will start dropping packets +# allowed_pending_messages = 10000 +# +# ## Number of timing/histogram values to track per-measurement in the +# ## calculation of percentiles. Raising this limit increases the accuracy +# ## of percentiles but also increases the memory usage and cpu time. +# percentile_limit = 1000 + + +# # Suricata stats plugin +# [[inputs.suricata]] +# ## Data sink for Suricata stats log +# # This is expected to be a filename of a +# # unix socket to be created for listening. +# source = "/var/run/suricata-stats.sock" +# +# # Delimiter for flattening field keys, e.g. subitem "alert" of "detect" +# # becomes "detect_alert" when delimiter is "_". +# delimiter = "_" + + +# # Accepts syslog messages following RFC5424 format with transports as per RFC5426, RFC5425, or RFC6587 +# [[inputs.syslog]] +# ## Specify an ip or hostname with port - eg., tcp://localhost:6514, tcp://10.0.0.1:6514 +# ## Protocol, address and port to host the syslog receiver. +# ## If no host is specified, then localhost is used. +# ## If no port is specified, 6514 is used (RFC5425#section-4.1). +# server = "tcp://:6514" +# +# ## TLS Config +# # tls_allowed_cacerts = ["/etc/telegraf/ca.pem"] +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# +# ## Period between keep alive probes. +# ## 0 disables keep alive probes. +# ## Defaults to the OS configuration. +# ## Only applies to stream sockets (e.g. TCP). +# # keep_alive_period = "5m" +# +# ## Maximum number of concurrent connections (default = 0). +# ## 0 means unlimited. +# ## Only applies to stream sockets (e.g. TCP). +# # max_connections = 1024 +# +# ## Read timeout is the maximum time allowed for reading a single message (default = 5s). +# ## 0 means unlimited. +# # read_timeout = "5s" +# +# ## The framing technique with which it is expected that messages are transported (default = "octet-counting"). +# ## Whether the messages come using the octect-counting (RFC5425#section-4.3.1, RFC6587#section-3.4.1), +# ## or the non-transparent framing technique (RFC6587#section-3.4.2). +# ## Must be one of "octet-counting", "non-transparent". +# # framing = "octet-counting" +# +# ## The trailer to be expected in case of non-trasparent framing (default = "LF"). +# ## Must be one of "LF", or "NUL". +# # trailer = "LF" +# +# ## Whether to parse in best effort mode or not (default = false). +# ## By default best effort parsing is off. +# # best_effort = false +# +# ## Character to prepend to SD-PARAMs (default = "_"). +# ## A syslog message can contain multiple parameters and multiple identifiers within structured data section. +# ## Eg., [id1 name1="val1" name2="val2"][id2 name1="val1" nameA="valA"] +# ## For each combination a field is created. +# ## Its name is created concatenating identifier, sdparam_separator, and parameter name. +# # sdparam_separator = "_" + + +# # Stream a log file, like the tail -f command +# [[inputs.tail]] +# ## files to tail. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## "/var/log/**.log" -> recursively find all .log files in /var/log +# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log +# ## "/var/log/apache.log" -> just tail the apache log file +# ## +# ## See https://github.com/gobwas/glob for more examples +# ## +# files = ["/var/mymetrics.out"] +# ## Read file from beginning. +# from_beginning = false +# ## Whether file is a named pipe +# pipe = false +# +# ## Method used to watch for file updates. Can be either "inotify" or "poll". +# # watch_method = "inotify" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Generic TCP listener +# [[inputs.tcp_listener]] +# # DEPRECATED: the TCP listener plugin has been deprecated in favor of the +# # socket_listener plugin +# # see https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener + + +# # Generic UDP listener +# [[inputs.udp_listener]] +# # DEPRECATED: the TCP listener plugin has been deprecated in favor of the +# # socket_listener plugin +# # see https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener + + +# # Read metrics from VMware vCenter +# [[inputs.vsphere]] +# ## List of vCenter URLs to be monitored. These three lines must be uncommented +# ## and edited for the plugin to work. +# vcenters = [ "https://vcenter.local/sdk" ] +# username = "user@corp.local" +# password = "secret" +# +# ## VMs +# ## Typical VM metrics (if omitted or empty, all metrics are collected) +# # vm_include = [ "/*/vm/**"] # Inventory path to VMs to collect (by default all are collected) +# # vm_exclude = [] # Inventory paths to exclude +# vm_metric_include = [ +# "cpu.demand.average", +# "cpu.idle.summation", +# "cpu.latency.average", +# "cpu.readiness.average", +# "cpu.ready.summation", +# "cpu.run.summation", +# "cpu.usagemhz.average", +# "cpu.used.summation", +# "cpu.wait.summation", +# "mem.active.average", +# "mem.granted.average", +# "mem.latency.average", +# "mem.swapin.average", +# "mem.swapinRate.average", +# "mem.swapout.average", +# "mem.swapoutRate.average", +# "mem.usage.average", +# "mem.vmmemctl.average", +# "net.bytesRx.average", +# "net.bytesTx.average", +# "net.droppedRx.summation", +# "net.droppedTx.summation", +# "net.usage.average", +# "power.power.average", +# "virtualDisk.numberReadAveraged.average", +# "virtualDisk.numberWriteAveraged.average", +# "virtualDisk.read.average", +# "virtualDisk.readOIO.latest", +# "virtualDisk.throughput.usage.average", +# "virtualDisk.totalReadLatency.average", +# "virtualDisk.totalWriteLatency.average", +# "virtualDisk.write.average", +# "virtualDisk.writeOIO.latest", +# "sys.uptime.latest", +# ] +# # vm_metric_exclude = [] ## Nothing is excluded by default +# # vm_instances = true ## true by default +# +# ## Hosts +# ## Typical host metrics (if omitted or empty, all metrics are collected) +# # host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected) +# # host_exclude [] # Inventory paths to exclude +# host_metric_include = [ +# "cpu.coreUtilization.average", +# "cpu.costop.summation", +# "cpu.demand.average", +# "cpu.idle.summation", +# "cpu.latency.average", +# "cpu.readiness.average", +# "cpu.ready.summation", +# "cpu.swapwait.summation", +# "cpu.usage.average", +# "cpu.usagemhz.average", +# "cpu.used.summation", +# "cpu.utilization.average", +# "cpu.wait.summation", +# "disk.deviceReadLatency.average", +# "disk.deviceWriteLatency.average", +# "disk.kernelReadLatency.average", +# "disk.kernelWriteLatency.average", +# "disk.numberReadAveraged.average", +# "disk.numberWriteAveraged.average", +# "disk.read.average", +# "disk.totalReadLatency.average", +# "disk.totalWriteLatency.average", +# "disk.write.average", +# "mem.active.average", +# "mem.latency.average", +# "mem.state.latest", +# "mem.swapin.average", +# "mem.swapinRate.average", +# "mem.swapout.average", +# "mem.swapoutRate.average", +# "mem.totalCapacity.average", +# "mem.usage.average", +# "mem.vmmemctl.average", +# "net.bytesRx.average", +# "net.bytesTx.average", +# "net.droppedRx.summation", +# "net.droppedTx.summation", +# "net.errorsRx.summation", +# "net.errorsTx.summation", +# "net.usage.average", +# "power.power.average", +# "storageAdapter.numberReadAveraged.average", +# "storageAdapter.numberWriteAveraged.average", +# "storageAdapter.read.average", +# "storageAdapter.write.average", +# "sys.uptime.latest", +# ] +# ## Collect IP addresses? Valid values are "ipv4" and "ipv6" +# # ip_addresses = ["ipv6", "ipv4" ] +# +# # host_metric_exclude = [] ## Nothing excluded by default +# # host_instances = true ## true by default +# +# +# ## Clusters +# # cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected) +# # cluster_exclude = [] # Inventory paths to exclude +# # cluster_metric_include = [] ## if omitted or empty, all metrics are collected +# # cluster_metric_exclude = [] ## Nothing excluded by default +# # cluster_instances = false ## false by default +# +# ## Datastores +# # datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected) +# # datastore_exclude = [] # Inventory paths to exclude +# # datastore_metric_include = [] ## if omitted or empty, all metrics are collected +# # datastore_metric_exclude = [] ## Nothing excluded by default +# # datastore_instances = false ## false by default +# +# ## Datacenters +# # datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected) +# # datacenter_exclude = [] # Inventory paths to exclude +# datacenter_metric_include = [] ## if omitted or empty, all metrics are collected +# datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default. +# # datacenter_instances = false ## false by default +# +# ## Plugin Settings +# ## separator character to use for measurement and field names (default: "_") +# # separator = "_" +# +# ## number of objects to retreive per query for realtime resources (vms and hosts) +# ## set to 64 for vCenter 5.5 and 6.0 (default: 256) +# # max_query_objects = 256 +# +# ## number of metrics to retreive per query for non-realtime resources (clusters and datastores) +# ## set to 64 for vCenter 5.5 and 6.0 (default: 256) +# # max_query_metrics = 256 +# +# ## number of go routines to use for collection and discovery of objects and metrics +# # collect_concurrency = 1 +# # discover_concurrency = 1 +# +# ## the interval before (re)discovering objects subject to metrics collection (default: 300s) +# # object_discovery_interval = "300s" +# +# ## timeout applies to any of the api request made to vcenter +# # timeout = "60s" +# +# ## When set to true, all samples are sent as integers. This makes the output +# ## data types backwards compatible with Telegraf 1.9 or lower. Normally all +# ## samples from vCenter, with the exception of percentages, are integer +# ## values, but under some conditions, some averaging takes place internally in +# ## the plugin. Setting this flag to "false" will send values as floats to +# ## preserve the full precision when averaging takes place. +# # use_int_samples = true +# +# ## Custom attributes from vCenter can be very useful for queries in order to slice the +# ## metrics along different dimension and for forming ad-hoc relationships. They are disabled +# ## by default, since they can add a considerable amount of tags to the resulting metrics. To +# ## enable, simply set custom_attribute_exlude to [] (empty set) and use custom_attribute_include +# ## to select the attributes you want to include. +# ## By default, since they can add a considerable amount of tags to the resulting metrics. To +# ## enable, simply set custom_attribute_exlude to [] (empty set) and use custom_attribute_include +# ## to select the attributes you want to include. +# # custom_attribute_include = [] +# # custom_attribute_exclude = ["*"] +# +# ## Optional SSL Config +# # ssl_ca = "/path/to/cafile" +# # ssl_cert = "/path/to/certfile" +# # ssl_key = "/path/to/keyfile" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false + + +# # A Webhooks Event collector +# [[inputs.webhooks]] +# ## Address and port to host Webhook listener on +# service_address = ":1619" +# +# [inputs.webhooks.filestack] +# path = "/filestack" +# +# [inputs.webhooks.github] +# path = "/github" +# # secret = "" +# +# [inputs.webhooks.mandrill] +# path = "/mandrill" +# +# [inputs.webhooks.rollbar] +# path = "/rollbar" +# +# [inputs.webhooks.papertrail] +# path = "/papertrail" +# +# [inputs.webhooks.particle] +# path = "/particle" + + +# # This plugin implements the Zipkin http server to gather trace and timing data needed to troubleshoot latency problems in microservice architectures. +# [[inputs.zipkin]] +# # path = "/api/v1/spans" # URL path for span data +# # port = 9411 # Port on which Telegraf listens + diff --git a/evaluation/eval-runner/eval-runner/case/performance_analysis/utils.py b/evaluation/eval-runner/eval-runner/case/performance_analysis/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..afe1cd3f62a7e38e245fe33645897b76418d900e --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/performance_analysis/utils.py @@ -0,0 +1,187 @@ +import re,time +from common.utils import ssh_pool_default +from eval_lib.common.ssh import SSHPool +from eval_lib.databases.influx.influx_db import InfulxDB +from common.const import TELEGRAF_TABLE_NAME_IN_INFLUX +from eval_lib.common.logger import get_logger + +log = get_logger() + +def format_latency(time_str, target_unit): + units = {'us': 0.000001, 'µs': 0.000001, 'ms': 0.001, 's': 1} + time_str = time_str.strip() + try: + pattern = r"[^\d]*$" + match = re.search(pattern, time_str) + matched_position = match.start() + time_value, current_unit = time_str[:matched_position], match.group() + time_value = float(time_value) + + if current_unit not in units: + log.error(f"Invalid current time unit: {current_unit}") + return None + converted_time = round( + time_value * units[current_unit] / units[target_unit], 3 + ) + return str(converted_time) + target_unit + + except ValueError as e: + log.info(f"Error: {e}") + return None + +def get_traffic_tool_data( + vm_ip, ssh_pool: SSHPool=ssh_pool_default +): + result = {} + ssh_client = ssh_pool.get(vm_ip) + cmd = "cat traffic_result.log " + _, stdout, stderr = ssh_client.exec_command(cmd) + logs = stdout.readlines() + try: + if logs: + result["server.lantency_p50"] = format_latency( + logs[0].split()[0], "ms" + ) + result["server.lantency_p90"] = format_latency( + logs[1].split()[0], "ms" + ) + result["server.rps"] = logs[2].split()[0] + err = stderr.readlines() + if err: + log.error(f"cat log err :{err}") + assert False + except Exception as e: + log.error(f"no found log :{e}") + assert False + return result + +def reload_telegraf_conf(vm_ip, ssh_pool: SSHPool=ssh_pool_default): + ssh_client = ssh_pool.get(vm_ip) + command = "sudo mv telegraf.conf /etc/telegraf/telegraf.conf && sudo systemctl restart telegraf && sudo systemctl status telegraf" + _, stdout, stderr = ssh_client.exec_command(command) + output = stdout.read().decode() + if "Active: active (running)" in output: + log.info(f"telegraf restarted successfully and is running") + return True + else: + log.error( + f"telegraf restart failed, err: {stderr.read().decode()}" + ) + return False + +def get_total_memory_Kbyte(vm_ip, ssh_pool: SSHPool=ssh_pool_default): + ssh_client = ssh_pool.get(vm_ip) + _, stdout, stderr = ssh_client.exec_command("free |awk '/Mem/{print $2}'") + total_mem = stdout.read().decode().strip() + err = stderr.read().decode() + if err: + log.error(f"get total memory Byte err: {err}") + return int(total_mem) + +def get_process_usage_by_telegraf(vm_ip, process_name_list, start_time, end_time): + ''' + 获取进程cpu/mem在一段时间内的90th的使用率 + return {'{process_name}_max_cpu_usage': 10.0, '{process_name}_max_mem_usage': 10.0} + ''' + influx_db = InfulxDB( + host=vm_ip, + database=TELEGRAF_TABLE_NAME_IN_INFLUX, + ) + procstat_data = {} + # memory unit Mb + total_memory = get_total_memory_Kbyte(vm_ip) + for process_name in process_name_list: + procstat = influx_db.get_procstat_result(process_name, start_time, end_time) + # 内存百分比转换为Mb + mem_Mbyte = float(procstat["max_mem_usage"]) * total_memory / 100 / 1024 + if "-agent" in process_name: + key = "agent" + else: + key = process_name.replace("-", "_").replace(".", "_") + log.info(f"add key: {key}") + procstat_data[f"{key}.max_cpu"] = "{:.2f}%".format(procstat["max_cpu_usage"]) + procstat_data[f"{key}.max_mem"] = "{:.2f}Mb".format(mem_Mbyte) + return procstat_data + +def install_istio( + vm_ip, ssh_pool: SSHPool=ssh_pool_default +): + ssh = ssh_pool.get(vm_ip) + cmd1 = "sudo istio-1.17.1/bin/istioctl install --set profile=demo -y --set components.cni.enabled=true" + log.info(f"exec cmd: {cmd1}") + stdin, stdout, stderr = ssh.exec_command(cmd1) + log.info(stdout.readlines()) + err = stderr.readlines() + log.error(err) + if any("5m0s" in e for e in err): + cmd_tmp = "kubectl get pod -n istio-system|awk 'NR>1{print $3}'" + start_time = time.time() + end_time = start_time + 30 * 60 + while True: + stdin, stdout, stderr = ssh.exec_command(cmd_tmp) + logs = stdout.readlines() + log.info(f"istio_pod_status is {logs}") + if all("Running" in pod_status for pod_status in logs): + break + elif time.time() > end_time: + assert False + else: + time.sleep(30) + cmd2 = '''sudo kubectl label namespace default istio-injection=enabled && \ + sudo kubectl apply -f istio-1.17.1/samples/bookinfo/platform/kube/bookinfo.yaml''' + log.info(f"exec cmd: {cmd2}") + stdin, stdout, stderr = ssh.exec_command(cmd2) + log.info(stdout.readlines()) + err = stderr.readlines() + if err: + log.info(err) + assert False + +def init_istio( + vm_ip, ssh_pool: SSHPool=ssh_pool_default +): + ssh = ssh_pool.get(vm_ip) + i = 0 + loop_count=60 + while True: + log.info( + 'Wait for istio service status to be normal,about 300s, timeout is 600' + ) + stdin, stdout, stderr = ssh.exec_command('kubectl get pods') + logs = stdout.readlines() + log.info(logs) + res = True + for k in logs[1:]: + log.info("get pod ========= > {}".format(k)) + if 'Running' not in k.split()[2] or '2/2' not in k.split()[1]: + res = False + break + if res == True: + log.info('istio services is normal') + break + else: + if i >= loop_count: + assert False + i += 1 + time.sleep(10) + cmd = '''sudo kubectl exec "$(sudo kubectl get pod -l app=ratings -o jsonpath='{.items[0].metadata.name}')" -c ratings -- curl -sS productpage:9080/productpage | grep -o ".*" && \ + sudo kubectl apply -f istio-1.17.1/samples/bookinfo/networking/bookinfo-gateway.yaml && \ + sudo istio-1.17.1/bin/istioctl analyze''' + log.info(f"exec cmd: {cmd}") + stdin, stdout, stderr = ssh.exec_command(cmd) + log.info(stdout.readlines()) + log.error(stderr.readlines()) + +def get_istio_productpage_server_port(vm_ip, ssh_pool: SSHPool=ssh_pool_default): + ssh_client = ssh_pool.get(vm_ip) + _, stdout, stderr = ssh_client.exec_command("sudo kubectl get svc istio-ingressgateway -nistio-system | awk -F'[:,/]' '/80/{print $5}'") + output = stdout.read().decode() + if output: + log.info(f"get port success") + port = output.strip() + return port + else: + log.error( + f"get port failed, err: {stderr.read().decode()}" + ) + assert False \ No newline at end of file diff --git a/evaluation/eval-runner/eval-runner/case/runner_test/test_print.py b/evaluation/eval-runner/eval-runner/case/runner_test/test_print.py new file mode 100644 index 0000000000000000000000000000000000000000..aec2be1487fca99d92a38ed946eafcc6d930cbcb --- /dev/null +++ b/evaluation/eval-runner/eval-runner/case/runner_test/test_print.py @@ -0,0 +1,50 @@ +import pytest +import allure,time +from common.utils import step as allure_step +from eval_lib.common.logger import get_logger +from common.results import AgentResults + +case_info={} +case_name = "performance_analysis_nginx_http_with_agent" + +log = get_logger() +class TestPrint(): + + @classmethod + def setup_class(cls): + cls.result = AgentResults(case_name=case_name) + cls.result.add_case_info(info=case_info) + pass + + @classmethod + def teardown_class(cls): + cls.result.generate_yaml_file() + pass + + @allure.suite('performance analysis') + @allure.epic('Agent performance analysis') + @allure.feature('') + @allure.title('Agent性能分析 - http') + @allure.description('Test the performance of the agent on the http protocol') + @pytest.mark.medium + def test_print(self): + result_data = { + "agent.max_cpu": "1.2%", + "agent.max_mem": "132MB" + } + with allure_step('step 1: create instance'): + self.result.add_result_data(data=result_data) + log.info("gogogo") + count = 2 + for i in range(count): + log.info(f"no {i}: test") + time.sleep(10) + log.info('successful') + + with allure_step('step 2: create instance'): + log.info("gogogo") + count = 2 + for i in range(count): + log.info(f"no {i}: test2") + time.sleep(10) + log.info('successful3') diff --git a/evaluation/eval-runner/eval-runner/common/client.py b/evaluation/eval-runner/eval-runner/common/client.py new file mode 100644 index 0000000000000000000000000000000000000000..84a7405122860ce51621b25ae16fbb27b11a5866 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/common/client.py @@ -0,0 +1,68 @@ +import requests +import threading +import time +import json +from eval_lib.model.const import RESULT_TYPE_LOG_RAW +from eval_lib.common.logger import get_logger +log = get_logger() + + +class ResultClient(): + # 将测试结果文件 传输到controller + def __init__(self, server_url) : + self.server_url = server_url + + def send_result_zip(self, zip_file_path): + try: + with open(zip_file_path, 'rb') as file: + files = {'file': file} + response = requests.post(f'{self.server_url}', files=files) + if response.status_code == 200: + log.info("Result files uploaded successfully!") + else: + log.error(f"Upload failed: {response.text}") + except Exception as e: + log.error(f"Upload failed: {e}") + + +class LogClient(threading.Thread): + # 将测试过程log 传输到controller + def __init__(self, uuid, log_file, server_url): + super().__init__() + self.uuid = uuid + self.log_file = log_file + self.server_url = server_url + self.last_position = 0 + self._stop_event = threading.Event() + + def stop(self): + self._stop_event.set() + + def send_log(self): + headers = { + 'Content-Type': 'application/json' + } + with open(self.log_file, 'r') as file: + file.seek(self.last_position) # 定位到上一次发送的位置 + new_log_data = file.read() + payload = json.dumps({ + "uuid": self.uuid, + "type": RESULT_TYPE_LOG_RAW, + "data": new_log_data + }) + if new_log_data: + response = requests.request("POST", self.server_url, headers=headers, data=payload) + if response.status_code == 200: + log.info("New log sent successfully.") + # 更新上一次发送的位置 + self.last_position = file.tell() + else: + log.error(f"Failed to send new log error:{response.text}") + else: + log.info("No new log to send.") + + def run(self): + while not self._stop_event.is_set(): + self.send_log() + time.sleep(5) + self.send_log() \ No newline at end of file diff --git a/evaluation/eval-runner/eval-runner/common/config.py b/evaluation/eval-runner/eval-runner/common/config.py new file mode 100644 index 0000000000000000000000000000000000000000..2ce473f11e36fbbcb85867003f71e86a95bb7efe --- /dev/null +++ b/evaluation/eval-runner/eval-runner/common/config.py @@ -0,0 +1,76 @@ +import yaml +from common.const import RUNNER_CONFIG_PATH +from eval_lib.common.logger import get_logger +from eval_lib.model.base import CaseParams + +log = get_logger() + + +class CaseConf(): + + def __init__(self): + self.agent_tools = {} + self.platform_tools = {} + self.runner_data_dir = None + self.listen_port = None + self.case_params: CaseParams = None + self.parse() + + def parse(self): + try: + with open(RUNNER_CONFIG_PATH, 'r') as f: + yml:dict = yaml.safe_load(f) + self.listen_port = yml.get('listen_port', 10083) + self.agent_tools = yml.get("agent-tools") + self.platform_tools = yml.get("platform-tools") + self.runner_data_dir = yml.get("runner_data_dir") + self.global_ssh_port = yml.get('global_ssh_port', 22) + self.global_ssh_username = yml.get('global_ssh_username', "") + self.global_ssh_password = yml.get('global_ssh_password', "") + self.fixed_host = yml.get('fixed_host', "") + self.case_params = self.parse_case_params(yml) + self.parse_mysql(yml) + self.parse_redis(yml) + except Exception as e: + log.error(f"file:eval-runner.yaml, yaml parser Error: {e}") + + def parse_case_params(self, yml: dict) -> CaseParams: + case_params: dict = yml.get("case_params") + return CaseParams(case_params) + + def parse_mysql(self, yml): + self.mysql = yml.get("mysql") + self.mysql_host = self.mysql.get("host", "127.0.0.1") + self.mysql_port = self.mysql.get("port", 3306) + self.mysql_user = self.mysql.get("user", "root") + self.mysql_password = self.mysql.get("password", "deepflow") + self.mysql_db = self.mysql.get("db", "evaluation") + + def parse_redis(self, yml): + self.redis = yml.get("redis") + self.redis_host = self.redis.get("host", "127.0.0.1") + self.redis_port = self.redis.get("port", 6379) + self.redis_password = self.redis.get("password", "root") + self.redis_db = self.redis.get("db", "0") + + + def is_valid(self): + if not self.agent_tools: + log.error("agent-tools is empty") + assert False + if not self.platform_tools: + log.error("platform-tools is empty") + assert False + if not self.runner_data_dir: + log.error("runner_data_dir is empty") + assert False + if not self.case_params: + log.error("case_params is empty") + assert False + if not self.case_params.is_valid(): + log.error(f"case_params {self.case_params} is invalid") + assert False + return True + + +conf = CaseConf() diff --git a/evaluation/eval-runner/eval-runner/common/const.py b/evaluation/eval-runner/eval-runner/common/const.py new file mode 100644 index 0000000000000000000000000000000000000000..16f8d393da8bd7192aa7f69bae951b7bbf43ae78 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/common/const.py @@ -0,0 +1,12 @@ +LOCAL_PATH = "/root/eval-runner" +RUNNER_CONFIG_PATH = "/etc/eval-runner.yaml" + +TELEGRAF_TABLE_NAME_IN_INFLUX = "telegraf" + +API_PREFIX_RESULT_LOG = "/v1/evaluation/result/log" +API_PREFIX_RESULT_ZIP = "/v1/evaluation/result/zip" +CONTROLLER_HOST = "evaluation-controller" + + + + diff --git a/evaluation/eval-runner/eval-runner/common/module.py b/evaluation/eval-runner/eval-runner/common/module.py new file mode 100644 index 0000000000000000000000000000000000000000..34f4182868804bd0e4f8e23bded02275e67d5cb4 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/common/module.py @@ -0,0 +1,6 @@ +from eval_lib.model.base import BaseStruct + + +class AgentMeta(BaseStruct): + + KEYS = ["agent_ip", "version", "ssh_port", "ssh_username", "ssh_password"] diff --git a/evaluation/eval-runner/eval-runner/common/results.py b/evaluation/eval-runner/eval-runner/common/results.py new file mode 100644 index 0000000000000000000000000000000000000000..a597e17216b5613753dcd6ba84bc7975ea438c59 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/common/results.py @@ -0,0 +1,52 @@ +import yaml +from common.config import conf +class ResultsBase: + pass + + +class AgentResults(ResultsBase): + + def __init__(self, case_name): + self.case_name = case_name + # results dir + self.dir_path = f"{conf.runner_data_dir}/runner-{conf.case_params.uuid}/report" + self.data_dict = {"case_name": case_name} + + def add_result_data(self, data: dict, index: int=0): + modified_dict = {f"{self.case_name}." + key.replace("-", "_") + f".{index}": value for key, value in data.items()} + self.data_dict.update(modified_dict) + + def add_case_info(self, info): + self.data_dict.update(info) + + def generate_yaml_file(self): + if len(self.data_dict) > 1: + yaml_data = yaml.dump(self.format_data(self.data_dict)) + with open(f"{self.dir_path}/{self.case_name}.yaml", "w") as f: + f.write(yaml_data) + + @staticmethod + def format_data(data): + + def merge_dict(dict1, dict2): + result = {} + for key in set(dict1.keys()) | set(dict2.keys()): + if key in dict1 and key in dict2 and isinstance( + dict1[key], dict + ) and isinstance(dict2[key], dict): + result[key] = merge_dict(dict1[key], dict2[key]) + else: + result[key] = dict2.get(key, dict1.get(key)) + return result + + result = {} + for key, value in data.items(): + point = {} + parts = key.split(".") + for i in range(len(parts) - 1, -1, -1): + if i == len(parts) - 1: + point[parts[i]] = value + else: + point = {parts[i]: point} + result = merge_dict(result, point) + return result diff --git a/evaluation/eval-runner/eval-runner/common/utils.py b/evaluation/eval-runner/eval-runner/common/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..44a81e81fd60889795dafa4d6f4b6969ace82403 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/common/utils.py @@ -0,0 +1,438 @@ +import allure +import os +import time +import re +import zipfile + +from scp import SCPClient + +from eval_lib.common.logger import get_logger +from eval_lib.common.ssh import SSHPool +from eval_lib.databases.redis import runner_info +from eval_lib.databases.redis import const as redis_const +from common.module import AgentMeta +from common.config import conf +from platform_tools.aliyun.aliyun_sdk import Aliyun +from platform_tools.base import PlatformBase +from agent_tools.deepflow_agent.deepflow_agent import DeeepflowAgent +from agent_tools.base import AgentBase + +ssh_pool_default = SSHPool( + conf.global_ssh_port, + conf.global_ssh_username, + conf.global_ssh_password, +) +redis_db = runner_info.RedisRunnerInfo( + host=conf.redis_host, port=conf.redis_port, password=conf.redis_password, + db=conf.redis_db, max_connections=10 +) +log = get_logger() + +def get_case_uuid(): + return conf.case_params.uuid[:8] + +def step(title): + """ + 执行一个步骤,并根据Redis中存储的运行状态来决定步骤的执行流程。 + + :param title: 步骤的标题,用于日志记录和报告。 + :return: 执行allure步骤后的结果。 + """ + log.info(title) # 记录步骤开始的日志 + while True: + # 从Redis获取运行者信息 + runner_info_dict = redis_db.get_runner_info(uuid=conf.case_params.uuid) + log.info(runner_info_dict) + case_status = runner_info_dict.get("case-status", None) + case_control_status = runner_info_dict.get("case-control-status", None) + # 检查是否需要主动暂停用例 + if case_control_status == redis_const.CASE_STATUS_PAUSED: + log.info(f"case pause proactively") + # 如果当前状态不是暂停状态,则更新状态为暂停 + if case_status != redis_const.CASE_STATUS_PAUSED: + redis_db.update_runner_info( + uuid=conf.case_params.uuid, + info={"case-status": redis_const.CASE_STATUS_PAUSED} + ) + case_status = redis_const.CASE_STATUS_PAUSED + + # 检查是否需要主动取消用例 + elif case_control_status == redis_const.CASE_STATUS_CANCELLED: + log.info(f"case cancel proactively") + # 如果当前状态不是取消状态,则更新状态为取消 + if case_status != redis_const.CASE_STATUS_CANCELLED: + redis_db.update_runner_info( + uuid=conf.case_params.uuid, + info={"case-status": redis_const.CASE_STATUS_CANCELLED} + ) + case_status = redis_const.CASE_STATUS_CANCELLED + log.info("case cancel success") + assert False + + elif case_control_status == redis_const.CASE_STATUS_RUNNING: + # 如果当前状态不是运行状态,则更新状态为运行 + if case_status != redis_const.CASE_STATUS_RUNNING: + redis_db.update_runner_info( + uuid=conf.case_params.uuid, + info={"case-status": redis_const.CASE_STATUS_RUNNING} + ) + case_status = redis_const.CASE_STATUS_RUNNING + + # 如果用例状态不是运行中,则每隔20秒检查一次;如果是,则结束循环 + if case_status != redis_const.CASE_STATUS_RUNNING: + time.sleep(20) + else: + break + # 执行allure步骤,并返回结果 + return allure.step(title) + + +def choose_platform() -> PlatformBase: + platform_type = conf.platform_tools.get("type", "") + if platform_type == 'aliyun': + aliyun_info = conf.platform_tools.get("aliyun", {}) + if 'ALICLOUD_ACCESS_KEY' not in os.environ: + os.environ['ALICLOUD_ACCESS_KEY'] = aliyun_info['access_key'] + + if 'ALICLOUD_SECRET_KEY' not in os.environ: + os.environ['ALICLOUD_SECRET_KEY'] = aliyun_info['secret_key'] + + if 'ALICLOUD_REGION' not in os.environ: + os.environ['ALICLOUD_REGION'] = aliyun_info['region'] + return Aliyun + else: + # 如果没有选择有效的平台,则记录错误并返回 None + log.error("Invalid platform type specified.") + return None + + +def choose_agent() -> AgentBase: + agent_type = conf.case_params.agent_type + if agent_type == 'deepflowce': + return DeeepflowAgent + else: + # 如果没有选择有效的 agent,则记录错误并返回 None + log.error("Invalid agent type specified.") + assert False + + +def get_meta_data(agent_ip): + agent_type = conf.case_params.agent_type + agent_conf = conf.agent_tools.get(agent_type) + agent_version = agent_conf['version'] + agent_meta = AgentMeta() + agent_meta.ssh_port = conf.global_ssh_port + agent_meta.ssh_password = conf.global_ssh_password + agent_meta.ssh_username = conf.global_ssh_username + agent_meta.agent_ip = agent_ip + agent_meta.version = agent_version + return agent_meta + + +def get_fixed_host_ip(instance_name): + host_ips: dict = conf.fixed_host + ip = "" + if "traffic" in instance_name: + ip = host_ips.get("performance_analysis_traffic_ip", "") + elif "nginx" in instance_name and 'agent' in instance_name: + ip = host_ips.get("performance_analysis_nginx_ip", "") + elif "istio" in instance_name and 'agent' in instance_name: + ip = host_ips.get("performance_analysis_istio_ip", "") + return ip + + +def install_unzip(vm_ip, ssh_pool: SSHPool = ssh_pool_default): + """ + 通过SSH在指定的虚拟机上安装unzip工具。 + + 参数: + - vm_ip: 要安装unzip工具的虚拟机IP地址。 + - ssh_pool: SSH连接池,用于管理SSH连接。默认为ssh_pool_default。 + + 返回值: + 无返回值。 + """ + # 从SSH连接池获取指定IP的SSH客户端 + ssh_client = ssh_pool.get(vm_ip) + # 检查unzip是否已经安装 + check_command = 'which unzip' + _, stdout, _ = ssh_client.exec_command(check_command) + # 如果已安装,则记录日志并返回 + if stdout.channel.recv_exit_status() == 0: + log.info('Unzip already installed on') + return + # 获取系统信息,用于后续根据系统类型安装unzip + system_name, _ = get_system_info(vm_ip, ssh_pool) + # 根据系统类型选择安装命令 + if 'CentOS' in system_name or 'Alibaba' in system_name: + install_command = 'sudo yum install -y unzip' + elif 'Ubuntu' in system_name or 'Debian' in system_name: + install_command = 'sudo apt-get install -y unzip' + elif 'Amolis' in system_name: + install_command = 'sudo dnf install -y unzip' + else: + # 如果系统不受支持,则记录错误日志并返回 + log.error(f'Unsupported system: {system_name}') + return + # 执行安装命令 + _, stdout, stderr = ssh_client.exec_command(install_command) + # 获取命令执行状态,并根据状态记录成功或失败的日志 + exit_status = stdout.channel.recv_exit_status() + if exit_status == 0: + log.info('Unzip installed successfully on') + else: + log.error( + f'Failed to install unzip, error log:{stderr.read().decode()}' + ) + + +def get_system_info(vm_ip, ssh_pool: SSHPool = ssh_pool_default) -> tuple: + """ + 通过SSH连接到指定的虚拟机IP地址,获取操作系统的名称和版本信息。 + + 参数: + vm_ip (str): 要连接的虚拟机的IP地址。 + ssh_pool (SSHPool): SSH连接池,默认为ssh_pool_default。用于管理SSH连接。 + + 返回: + tuple: 包含操作系统的名称和版本的元组。(name, version) + """ + # 从SSH连接池中获取指定IP的SSH客户端 + ssh_client = ssh_pool.get(vm_ip) + # 在虚拟机上执行命令,获取操作系统名称和版本的信息 + _, stdout, stderr = ssh_client.exec_command( + "cat /etc/os-release | grep -E '^NAME=|^VERSION='" + ) + name = "" + version = "" + # 解析命令输出,提取操作系统名称和版本 + for line in stdout: + if line.startswith('NAME='): + name = line.split('=')[1].strip() + elif line.startswith('VERSION='): + version = line.split('=')[1].strip() + # 如果未能成功提取名称或版本,记录警告日志 + if not name or not version: + log.warning( + 'Failed to get system info, error log:', + stderr.read().decode() + ) + # 返回操作系统名称和版本 + return name, version + + +def upload_files( + vm_ip, local_path, remote_path, ssh_pool: SSHPool = ssh_pool_default +) -> bool: + ''' + 上传文件到远程服务器。 + + 参数: + - vm_ip: 远程服务器的IP地址。 + - local_path: 本地文件或目录的路径。 + - remote_path: 远程服务器上文件或目录的目标路径(目录必须存在)。 + - ssh_pool: SSH连接池,默认使用 ssh_pool_default。 + + 返回值: + - bool: 上传成功返回True,失败返回False。 + ''' + # 从SSH连接池获取SSH客户端 + ssh_client = ssh_pool.get(vm_ip) + try: + # 使用SCPClient上传文件或目录 + with SCPClient( + ssh_client.get_transport(), socket_timeout=15.0 + ) as scpclient: + # 判断本地路径是文件还是目录,并分别处理 + if os.path.isfile(local_path): + # 上传文件 + scpclient.put(local_path, remote_path) + log.info(f'Upload file success: {local_path} -> {remote_path}') + return True + elif os.path.isdir(local_path): + # 上传目录下所有文件 + files_uploaded = 0 + for filename in os.listdir(local_path): + file_path = os.path.join(local_path, filename) + if os.path.isfile(file_path): + scpclient.put(file_path, remote_path) + log.info( + f'Upload file success: {file_path} -> {remote_path}' + ) + files_uploaded += 1 + # 记录上传文件总数 + log.info( + f'Total {files_uploaded} files uploaded from {local_path} to {remote_path}' + ) + return True + else: + # 处理本地路径无效的情况 + log.error(f'Invalid local path: {local_path}') + return False + except FileNotFoundError: + # 处理本地文件或目录不存在的情况 + log.error(f'Local file or directory not found: {local_path}') + return False + except Exception as e: + # 处理其他异常 + log.error(f'Upload file error: {e}') + return False + + +def check_ssh_command(vm_ip, command, ssh_pool: SSHPool = ssh_pool_default) -> bool: + ssh_client = ssh_pool.get(vm_ip) + _, _, stderr = ssh_client.exec_command(command) + err = stderr.read().decode() + if "not found" in err or "command not found" in err: + log.info(f"Command '{command}' not found") + return False + else: + log.info(f"Command '{command}' found") + return True + + +def check_helm_chart(vm_ip, chart_name, namespace, ssh_pool: SSHPool = ssh_pool_default)->bool: + ssh_client = ssh_pool.get(vm_ip) + command = f"sudo helm list --short -n {namespace}" + _, stdout, stderr = ssh_client.exec_command(command) + output = stdout.read().decode() + error = stderr.read().decode() + if error: + log.error(f"Error executing helm command {command}: {error}") + if chart_name in output: + log.info(f"Chart '{chart_name}' found in namespace '{namespace}'") + return True + else: + log.info(f"Chart '{chart_name}' not found in namespace '{namespace}'") + return False + +def install_k8s(vm_ip, ssh_pool: SSHPool = ssh_pool_default): + """ + 安装Kubernetes集群。 + + 通过SSH连接到指定的虚拟机IP地址,执行安装Kubernetes的命令,并监控安装过程直到集群达到Ready状态。 + + 参数: + - vm_ip: 要安装Kubernetes的虚拟机的IP地址。 + - ssh_pool: 用于SSH连接的池,默认为ssh_pool_default。 + + 返回值: + 无返回值。安装成功会打印日志信息,失败则抛出异常。 + """ + ssh_client = ssh_pool.get(vm_ip) # 从SSH池中获取一个客户端实例 + log.info(f"check if k8s is installed") + _, stdout, stderr = ssh_client.exec_command( + "sudo kubectl get nodes" + ) # 执行命令获取节点状态 + exit_status = stdout.channel.recv_exit_status() + if exit_status == 0: + log.info("k8s installation completed") + return + else: + log.error( + f'k8s is not isntalled, will install k8s' + ) # k8s 已经安装 + try: + # 构造安装Kubernetes和Calico的命令 + cmd_install = '''sudo sealos run localhost/labring/kubernetes:v1.25.0 localhost/calico:v3.24.1 --single && \ + sudo kubectl taint node node-role.kubernetes.io/control-plane- --all''' + log.info(f'install k8s, cmd: {cmd_install}') + _, stdout, stderr = ssh_client.exec_command(cmd_install) # 执行安装命令 + exit_status = stdout.channel.recv_exit_status() # 获取命令执行的状态 + if exit_status == 0: + log.info(f'{cmd_install} exec successful') + else: + # 如果命令执行失败,记录错误日志 + log.error( + f'abnormal installation of k8s, error: {stderr.read().decode()}' + ) + + # 循环检查Kubernetes节点状态,直到所有节点Ready + for _ in range(30): + cmd_get_nodes = "sudo kubectl get nodes" + log.info(f"check the status of k8s, cmd: {cmd_get_nodes}") + _, stdout, stderr = ssh_client.exec_command( + cmd_get_nodes + ) # 执行命令获取节点状态 + output = stdout.read().decode() # 命令输出 + error = stderr.read().decode() # 错误信息 + if re.search(r"\bReady\b", output): # 检查输出中是否有"Ready"关键词 + log.info("k8s installation completed") + break + # 如果节点未就绪,记录日志并等待一段时间后再次检查 + log.error( + f'wait k8s ready: info: {output}, wait about 5s, err: {error}' + ) + time.sleep(5) + else: + # 如果30次检查都未达到Ready状态,则断言失败 + assert False + except Exception as err: + # 如果安装过程中出现异常,记录错误日志并断言失败 + log.error( + 'install kubernetes unsuccessful or the cluster status is abnormal, err: {}' + .format(err) + ) + assert False + + +def ensure_process_running( + vm_ip, process_name, ssh_pool: SSHPool = ssh_pool_default +): + """ + 确保指定的进程在远程虚拟机上运行。 + + 参数: + - vm_ip: 远程虚拟机的IP地址。 + - process_name: 需要确保运行的进程名称。 + - ssh_pool: SSH连接池,用于远程操作虚拟机。默认为ssh_pool_default。 + + 返回值: + 无返回值。如果进程成功启动并运行,则记录日志信息;如果启动失败,则记录错误日志并抛出断言错误。 + """ + # 通过SSH连接池获取远程虚拟机的SSH客户端 + ssh_client = ssh_pool.get(vm_ip) + # 构造启动并检查进程状态的命令 + check_cmd = f'sudo systemctl start {process_name} && sudo systemctl status {process_name}' + # 执行命令,获取执行结果 + _, stdout, stderr = ssh_client.exec_command(check_cmd) + # 读取并解码标准输出的内容 + output = stdout.read().decode() + # 检查进程是否成功启动并运行 + if "Active: active (running)" in output: + log.info(f"{process_name} successfully started and is running") + else: + # 如果进程启动失败,记录错误日志并抛出断言错误 + log.error( + f"{process_name} start failed, err: {stderr.read().decode()}" + ) + assert False + + +def zip_dir(folder_path, output_path): + """ + 将指定文件夹压缩成ZIP文件。 + + :param folder_path: 需要压缩的文件夹路径。 + :param output_path: 压缩文件输出的路径。 + """ + # 创建一个ZIP文件对象,准备写入压缩文件 + with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + # 遍历folder_path下的所有文件和子文件夹 + for root, dirs, files in os.walk(folder_path): + # 遍历子文件夹,并将其添加到压缩文件中 + for dir in dirs: + dir_path = os.path.join(root, dir) + # 将目录添加到压缩文件,使用相对路径 + zipf.write( + dir_path, + os.path.relpath(dir_path, os.path.dirname(folder_path)) + ) + # 遍历文件,并将其添加到压缩文件中 + for file in files: + file_path = os.path.join(root, file) + # 将文件添加到压缩文件,使用相对路径 + zipf.write( + file_path, + os.path.relpath(file_path, os.path.dirname(folder_path)) + ) diff --git a/evaluation/eval-runner/eval-runner/eval-runner.py b/evaluation/eval-runner/eval-runner/eval-runner.py new file mode 100644 index 0000000000000000000000000000000000000000..53e1f3d0b5c65d6493ef1d48f300fe7701fc010f --- /dev/null +++ b/evaluation/eval-runner/eval-runner/eval-runner.py @@ -0,0 +1,206 @@ +import time +import os +import subprocess +import sys +import shutil + +from common import const +from common.config import conf +from eval_lib.common.logger import get_logger +from eval_lib.common.logger import LoggerManager +from common.utils import redis_db +from common.utils import zip_dir +from eval_lib.databases.redis import const as redis_const +from eval_lib.source.dictonary import Dictionary +from common.client import ResultClient, LogClient + +log = get_logger() + + +class Runner(): + + def __init__(self): + self.uuid = conf.case_params.uuid + self.case_params = conf.case_params + self.start_time = int(time.time()) + self.pytest_process: subprocess.Popen = None + + self.runner_dir = const.LOCAL_PATH + self.runner_data_path = f"{conf.runner_data_dir}/runner-{self.uuid}" + self.runner_report_path = f"{self.runner_data_path}/report" + self.runner_log_path = f"{self.runner_data_path}/log" + self.runner_allure_path = f"{self.runner_data_path}/allure-result" + + def get_case_path(self): + case_path = Dictionary().CASE_DICTIONARY.get( + self.case_params.case_name + ) + if not case_path: + log.error(f"case_name: {self.case_params.case_name} not support") + raise Exception( + f"case_name: {self.case_params.case_name} not support" + ) + return case_path[0] + + def run(self): + try: + self.init_env() + self.exec_pytest() + self.wait() + # self.get_results() + except Exception as e: + log.error(f"Runner {self.uuid} run error: {e}") + finally: + self.push_results() + redis_db.update_runner_info( + uuid=self.uuid, + info={"runner-status": redis_const.CASE_STATUS_COMPLETED} + ) + time.sleep(300) + + def init_env(self): + """初始化环境目录 + """ + # 创建数据目录 + log.info(f"data_dir is : {self.runner_data_path}") + folder_paths = [ + conf.runner_data_dir, + self.runner_data_path, + self.runner_report_path, + self.runner_log_path, + self.runner_allure_path, + ] + for folder_path in folder_paths: + try: + os.makedirs(folder_path) + log.info(f"Runner {self.uuid} create folder: {folder_path}") + except FileExistsError: + pass + log.info(f"Runner {self.uuid} init env success.") + + def exec_pytest(self): + # 执行测试用例 + envs = os.environ.copy() + envs["PYTHONPATH"] = f":{self.runner_dir}" + # TODO: leyi 修改log文件 + log_path = f"{self.runner_log_path}/pytest-{self.uuid}.log" + + try: + command = f"pytest -vs ./case/{self.get_case_path()} --alluredir {self.runner_allure_path} --workers {self.case_params.process_num} > {log_path}" + # 执行 pytest 命令 + log.info(f"exec pytest command: {command}") + self.pytest_process = subprocess.Popen( + command, + shell=True, + cwd=self.runner_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=envs, + ) + except subprocess.CalledProcessError as e: + log.error("exec pytest error:", e) + + redis_db.update_runner_info( + uuid=self.uuid, info={ + "runner-status": redis_const.CASE_STATUS_RUNNING, + "case-status": redis_const.CASE_STATUS_RUNNING + } + ) + + def wait(self): + log_path = f"{self.runner_log_path}/pytest-{self.uuid}.log" + lc = self.start_forward_log(log_path=log_path) + while True: + # 检查进程状态 + time.sleep(5) + # pytest 进程结束了 + if self.pytest_process.poll() is not None: + if self.pytest_process.returncode == 0: + log.info("pytest process has finished.") + else: + _, pytest_stderr = self.pytest_process.communicate() + log.error("pytest process occurred error") + if pytest_stderr is not None: + log.error(f"error_log: {pytest_stderr.decode()}") + lc.stop() + redis_db.update_runner_info( + uuid=self.uuid, + info={"case-status": redis_const.CASE_STATUS_COMPLETED} + ) + break + runner_info_dict = redis_db.get_runner_info(uuid=self.uuid) + if runner_info_dict["case-control-status" + ] == redis_const.CASE_STATUS_FORCE_END: + # 主动取消case执行 + redis_db.update_runner_info( + uuid=self.uuid, + info={"case-status": redis_const.CASE_STATUS_FORCE_END} + ) + log.error("case force end") + self.interrupt() + lc.stop() + break + + def interrupt(self): + self.pytest_process.kill() + log.error(f"Runner {self.uuid} interrupt.") + + def start_forward_log(self, log_path): + log.info("start log forwarding") + server_url = f"http://{const.CONTROLLER_HOST}:{conf.listen_port}{const.API_PREFIX_RESULT_LOG}" + lc = LogClient( + uuid=self.uuid, log_file=log_path, server_url=server_url + ) + lc.setDaemon(True) + lc.start() + return lc + + def push_results(self): + log.info("start push result to controller") + runner_data_zip = f"runner-{self.uuid}.zip" + shutil.move( + src=f"{conf.runner_data_dir}/runner.log", + dst=f"{self.runner_log_path}/runner.log" + ) + zip_dir(folder_path=self.runner_data_path, output_path=runner_data_zip) + server_url = f"http://{const.CONTROLLER_HOST}:{conf.listen_port}{const.API_PREFIX_RESULT_ZIP}" + rc = ResultClient(server_url=server_url) + rc.send_result_zip(zip_file_path=runner_data_zip) + log.info(f"Runner {self.uuid} push results.") + + def get_results(self): + # TODO: luyao 收集测试结果 + log.info("start gengerate allure result") + allure_tmp_dir = "allure-report" + command = f"allure generate -c {self.runner_allure_path}/ -o {allure_tmp_dir}" + try: + process = subprocess.run( + command, + shell=True, + cwd=self.runner_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if process.returncode == 0: + log.info("allure-report file gengerate successful") + else: + log.error( + f"allure-report file gengerate failed: error: {process.stderr.decode()}" + ) + return False + except subprocess.CalledProcessError: + log.error("allure generate error, cmd error") + return False + zip_dir( + folder_path="allure-report", + output_path=f"{self.runner_allure_path}/allure-report.zip" + ) + + +if __name__ == '__main__': + if not conf.is_valid(): + print('Invalid conf value, error exit.') + sys.exit(1) + # TODO: 初始化log文件 + LoggerManager(log_file=f"{conf.runner_data_dir}/runner.log") + Runner().run() diff --git a/evaluation/eval-runner/eval-runner/eval_lib b/evaluation/eval-runner/eval-runner/eval_lib new file mode 120000 index 0000000000000000000000000000000000000000..ccd87bf41b8a393a87af84e50d361366dcb59b77 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/eval_lib @@ -0,0 +1 @@ +../../eval-lib \ No newline at end of file diff --git a/evaluation/eval-runner/eval-runner/platform_tools/__init__.py b/evaluation/eval-runner/eval-runner/platform_tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/evaluation/eval-runner/eval-runner/platform_tools/aliyun/ali_const.py b/evaluation/eval-runner/eval-runner/platform_tools/aliyun/ali_const.py new file mode 100644 index 0000000000000000000000000000000000000000..cdfa77914fc4317df22f3c999cb411b64bc27131 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/platform_tools/aliyun/ali_const.py @@ -0,0 +1,14 @@ +# ------ Aliyun Public Cloud------------ +# Common Variable Definition +ali_resource_group_id_default = 'rg-aekzm564q2edrsi' +ali_security_group_id_default = 'sg-2zegvk5suvkrfo6qcd6r' +ali_v_switch_id_beijing_a = 'vsw-2zesbggx0gxti4bs53qga' +ali_v_switch_id_beijing_k = 'vsw-2zeh3cgrhmn0kmhudu1wl' +ali_zone_id_beijing_a = 'cn-beijing-a' +ali_zone_id_beijing_k = 'cn-beijing-k' +ali_key_pair_name_default = 'automation' +ali_instance_type_c6_2x_large = 'ecs.c6.2xlarge' +ali_instance_type_c6r_2x_large = 'ecs.c6r.2xlarge' +ali_image_id_x86_centos = 'm-2zec520yiix6ihla0r7b' +ali_image_id_arm = 'm-2ze8315uz1wvw8tshrv3' +ali_image_id_performance_analysis = "m-2ze04udh1zzjc6813fep" diff --git a/evaluation/eval-runner/eval-runner/platform_tools/aliyun/aliyun_sdk.py b/evaluation/eval-runner/eval-runner/platform_tools/aliyun/aliyun_sdk.py new file mode 100644 index 0000000000000000000000000000000000000000..dd89b38269c04711fb85bb6536f6a4641750df4e --- /dev/null +++ b/evaluation/eval-runner/eval-runner/platform_tools/aliyun/aliyun_sdk.py @@ -0,0 +1,284 @@ +import os +from platform_tools.aliyun import ali_const +from common.config import conf +from typing import List +from platform_tools.base import PlatformBase +from Tea.core import TeaCore +from alibabacloud_tea_util import models as util_models +from alibabacloud_tea_openapi import models as open_api_models +from alibabacloud_tea_util.client import Client as UtilClient +from alibabacloud_ecs20140526 import models as ecs_models +from alibabacloud_ecs20140526.client import Client as EcsClient +from alibabacloud_darabonba_number.client import Client as NumberClient + +from eval_lib.common.logger import get_logger + +log = get_logger() + + +class Aliyun(PlatformBase): + + def __init__(self) -> None: + super().__init__() + + @staticmethod + def create_client() -> EcsClient: + """ + 使用AK&SK初始化账号Client + @param access_key_id: + @param access_key_secret: + @return: Client + """ + config = open_api_models.Config( + # 必填,请确保代码运行环境设置了环境变量 ALICLOUD_ACCESS_KEY. , + access_key_id=os.environ['ALICLOUD_ACCESS_KEY'], + # 必填,请确保代码运行环境设置了环境变量 ALICLOUD_SECRET_KEY. , + access_key_secret=os.environ['ALICLOUD_SECRET_KEY'], + # 必填,请确保代码运行环境设置了环境变量 ALICLOUD_REGION. , + region_id=os.environ['ALICLOUD_REGION'], + ) + return EcsClient(config) + + @staticmethod + def _start_instances( + client: EcsClient, + region_id: str, + instance_ids: List[str], + dry_run: bool = False, + ) -> None: + """ + [批量] 实例开机-> None + """ + request = ecs_models.StartInstancesRequest( + dry_run=dry_run, region_id=region_id, instance_id=instance_ids + ) + responce = client.start_instances(request) + log.info( + f'start instance: {instance_ids}, Successfully. result: {UtilClient.to_jsonstring(TeaCore.to_map(responce.body))}' + ) + + @staticmethod + def _stop_instances( + client: EcsClient, + region_id: str, + instance_ids: List[str], + stopped_mode: str = 'KeepCharging', + dry_run: bool = False, + ) -> None: + """ + [批量] 实例关机-> None + """ + request = ecs_models.StopInstancesRequest( + region_id=region_id, + instance_id=instance_ids, + stopped_mode=stopped_mode, + dry_run=dry_run, + ) + runtime = util_models.RuntimeOptions() + responce = client.stop_instances_with_options(request, runtime) + log.info( + f'stop instance: {instance_ids}, successful. result:{UtilClient.to_jsonstring(TeaCore.to_map(responce.body))}' + ) + + @staticmethod + def _create_instances( + client: EcsClient, + image_id: str, + instance_name: str, + region_id: str, + instance_type: str, + security_group_id: str, + v_switch_id: str, + resource_group_id: str, + password: str, + zone_id: str, + key_pair_name: str, + amount: int, + )-> List[str]: + """ + [批量] 实例创建-> str: 实例id + """ + tag_0 = ecs_models.RunInstancesRequestTag( + key='财务单元', + value='自动化测试' + ) + request = ecs_models.RunInstancesRequest( + region_id=region_id, + instance_name=instance_name, + image_id=image_id, + instance_type=instance_type, + security_group_id=security_group_id, + v_switch_id=v_switch_id, + resource_group_id=resource_group_id, + password=password, + zone_id=zone_id, + key_pair_name=key_pair_name, + amount=amount, + tag=[tag_0], + ) + runtime = util_models.RuntimeOptions() + response = client.run_instances_with_options(request, runtime) + instance_ids = UtilClient.to_jsonstring( + response.body.instance_id_sets.instance_id_set + ) + UtilClient.sleep(40000) + log.info( + f'-----------create instance successful, instance ID:{instance_ids}--------------' + ) + return instance_ids + + @staticmethod + def _delete_instances( + client: EcsClient, + region_id: str, + instance_ids: List[str], + force: bool = False, + ) -> None: + """ + [批量] 实例删除-> None + """ + Aliyun._stop_instances(client, region_id, instance_ids) + Aliyun._await_instances_status( + client, region_id, instance_ids, "Stopped" + ) + request = ecs_models.DeleteInstancesRequest( + region_id=region_id, + instance_id=instance_ids, + force=force, + ) + runtime = util_models.RuntimeOptions() + response = client.delete_instances_with_options(request, runtime) + log.info( + '--------------------instance delete successful--------------------' + ) + log.info(UtilClient.to_jsonstring(UtilClient.to_map(response))) + + @staticmethod + def _await_instances_status( + client: EcsClient, + region_id: str, + instance_ids: List[str], + expect_instance_status: str, + ) -> bool: + """ + [批量] 等待实例状态为特定的状态, 默认等待20s,超过20s返回false,否则返回true。 + """ + time = 0 + flag = True + while flag and NumberClient.lt(time, 10): + flag = False + instances_info= Aliyun._get_instances_info( + client, region_id, instance_ids + ) + for instance in instances_info: + instance_status = instance["status"] + log.info( + f'instance: {instance["instanceid"]}, status: {instance_status}' + ) + if not UtilClient.equal_string( + instance_status, expect_instance_status + ): + UtilClient.sleep(3000) + flag = True + time = NumberClient.add(time, 1) + return NumberClient.lt(time, 10) + + @staticmethod + def _get_instances_info( + client: EcsClient, region_id: str, instance_ids: List[str] + ) -> List[dict]: + """ + [批量] 获取实例信息 + 返回字典列表[{"instanceid","ip":"","status":""},] + """ + instance_info = [] + request = ecs_models.DescribeInstancesRequest( + region_id=region_id, instance_ids=str(instance_ids) + ) + runtime = util_models.RuntimeOptions() + response = client.describe_instances_with_options(request, runtime) + instance_data = response.body.instances.instance + for instance in instance_data: + instance_info.append( + { + "instanceid": instance.instance_id, + "ip": instance.vpc_attributes.private_ip_address.ip_address[0], + "status": instance.status, + } + ) + return instance_info + + @staticmethod + def _get_instance_id_by_name( + client: EcsClient, + region_id: str, + instance_name: str + ) -> str: + describe_instances_request = ecs_models.DescribeInstancesRequest( + region_id=region_id, + instance_name=instance_name + ) + runtime = util_models.RuntimeOptions() + response = client.describe_instances_with_options(describe_instances_request, runtime) + if response.body.instances.instance: + return response.body.instances.instance[0].instance_id + else: + return "" + + @staticmethod + def create_instances( + instance_names: list, + image_id=ali_const.ali_image_id_x86_centos, + instance_type=ali_const.ali_instance_type_c6_2x_large, + ) -> dict: + '''创建通用镜像的实例 + 密码固定为CASE_SSH_PASSWORD_DEFAULT + ''' + client = Aliyun.create_client() + region_id=os.environ['ALICLOUD_REGION'] + instances_ip = {} + for instance_name in instance_names: + instance_ids = Aliyun._create_instances( + client=client, + instance_name=instance_name, + image_id=image_id, + instance_type=instance_type, + region_id=region_id, + security_group_id=ali_const.ali_security_group_id_default, + v_switch_id=ali_const.ali_v_switch_id_beijing_a, + resource_group_id=ali_const.ali_resource_group_id_default, + password=conf.global_ssh_password, + zone_id=ali_const.ali_zone_id_beijing_a, + key_pair_name=ali_const.ali_key_pair_name_default, + amount=1, + ) + Aliyun._await_instances_status( + client, region_id, instance_ids, "Running" + ) + instances_info = Aliyun._get_instances_info( + client=client, + region_id=region_id, + instance_ids=instance_ids, + ) + instances_ip[instance_name] = instances_info[0]["ip"] + log.info(f"create instances successful: {instances_ip}") + return instances_ip + + @staticmethod + def delete_instances(instance_names: list): + instance_ids = [] + client = Aliyun.create_client() + region_id=os.environ['ALICLOUD_REGION'] + for instance_name in instance_names: + instance_id = Aliyun._get_instance_id_by_name( + client=client, + region_id=region_id, + instance_name=instance_name + ) + if instance_id: + instance_ids.append(instance_id) + Aliyun._delete_instances( + client=client, + region_id=region_id, + instance_ids=instance_ids, + ) \ No newline at end of file diff --git a/evaluation/eval-runner/eval-runner/platform_tools/base.py b/evaluation/eval-runner/eval-runner/platform_tools/base.py new file mode 100644 index 0000000000000000000000000000000000000000..b3b162f49a65b29db3edc38c7a8de05818f810ef --- /dev/null +++ b/evaluation/eval-runner/eval-runner/platform_tools/base.py @@ -0,0 +1,37 @@ + + +class PlatformBase(): + def __init__(self) -> None: + pass + + @staticmethod + def create_instances(instance_names: list, image_id="", instance_type="") -> dict: + '''创建实例 + return: + { + "instance_name": "ip" + } + ''' + pass + + @staticmethod + def delete_instances(instance_names: list): + '''删除实例 + ''' + pass + + @staticmethod + def start_instances(instance_names: list): + pass + + @staticmethod + def stop_instances(instance_names: list): + pass + + @staticmethod + def get_instance_status(instance_name: str): + pass + + @staticmethod + def get_instance_ip(instance_name: str): + pass diff --git a/evaluation/eval-runner/eval-runner/pytest.ini b/evaluation/eval-runner/eval-runner/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..d3fe78aae407ccaeedef09b8d1b14d5e7995ca70 --- /dev/null +++ b/evaluation/eval-runner/eval-runner/pytest.ini @@ -0,0 +1,11 @@ +[pytest] +addopts = -p no:warnings +log_cli=true +log_cli_level=DEBUG +log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) +log_cli_date_format=%Y-%m-%d %H:%M:%S +markers = + high + medium + low + cancel \ No newline at end of file diff --git a/evaluation/requirements.txt b/evaluation/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..82f94b9858aecb16f2300a73b72ff452465e4662 --- /dev/null +++ b/evaluation/requirements.txt @@ -0,0 +1,17 @@ +alibabacloud-ecs20140526==4.0.3 +allure-pytest==2.13.3 +pytest==7.0.1 +pytest-parallel==0.1.1 +pytest_multithreading_allure==1.0.8 +PyYAML==6.0.1 +six==1.16.0 +alibabacloud-darabonba-number==0.0.4 +paramiko==3.4.0 +scp==0.14.5 +Flask==2.0.3 +Werkzeug==2.0.3 +influxdb==5.3.2 +pymysql==1.0.2 +peewee==3.17.3 +redis==4.3.5 +