nomad编排容器
# vscode扩展
hashicorp.hcl
fredwangwang.vscode-hcl-format
# 推荐直接下载二进制运行
# https://releases.hashicorp.com/nomad/
wget https://releases.hashicorp.com/nomad/1.6.2/nomad_1.6.2_linux_amd64.zip
配置 consul
# 不是必须的, 如果是单节点可以使用nomad, 只有多节点使用consul, 可以使用自带的 provider = "nomad" 配合 range nomadService
wget https://releases.hashicorp.com/consul/1.16.2/consul_1.16.2_linux_amd64.zip
https://developer.hashicorp.com/consul/docs/agent#configuring-consul-agents
# 提示异常: dial tcp 127.0.0.1:8500: connect: connection refused
# plan提示: Constraint ${attr.consul.version} semver >= 1.7.0 filtered 1 node
# 默认 consul 块将自动与所有 Nomad 代理配置合并, 如果在系统上检测到 Consul,这些合理的默认值会自动启用 Consul 集成. 节点发现等
# [必须运行]是使用了consul驱动, 通信异常
### 默认无密码
GET http://127.0.0.1:8500/v1/catalog/services
# ui界面
http://127.0.0.1:8500/ui/
# 配置文件
/etc/consul.hcl
# consul 的 acl: https://developer.hashicorp.com/consul/docs/security/acl/tokens
node_name = "consul-server"
server = true
bootstrap = true
ui_config {
enabled = true
}
datacenter = "dc1"
data_dir = "/consul/data"
log_level = "INFO"
addresses {
http = "0.0.0.0"
}
connect {
enabled = true
}
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
}
# 是公网ip, 对外暴露的地址, 或者主网卡的内网地址(需要在 ip a 看得到的ip)
bind_addr = "172.26.156.175"
# 注意 consul agent -dev 只能本地可以访问, 不能持久化数据, 正式环境使用 -server;
sudo consul agent -config-file=/etc/consul.hcl
# 获取临时 token(SecretID)
consul acl bootstrap
export CONSUL_HTTP_TOKEN=ba1f15c5-474b-7a6b-0e5c-689470913096
# 查看节点
consul members
# 查看token列表, 需要创建一个Local: true的token
consul acl token list
# 创建
consul acl policy list
consul acl token create -policy-id global-management -local -description "my token" -node-identity "main:main"
consul acl token delete -accessor-id=89ea3fba-7727-14b0-1dd3-afa5fd8983ed
# AccessorID: 89ea3fba-7727-14b0-1dd3-afa5fd8983ed
# SecretID: 286285b7-b94b-286c-aed0-99f6071c951a
# Description: my token
# Local: true
# Create Time: 2023-10-11 13:51:53.849210887 +0800 CST
# Node Identities:
# main (Datacenter: main)
# consul 会根据服务的健康检查进行健康检查
consul catalog datacenters
consul catalog services -tags
consul services deregister -id=xx
# 权限策略
consul acl policy list
# 策略创建/更新
# Permission denied: anonymous token lacks permission 'agent:read'
consul acl policy create -name "anonymous-readagent" -description "匿名读agent" -rules 'agent_prefix "" { policy = "read" }'
consul acl policy update -name "anonymous-readagent" -description "匿名读agent" -rules 'agent_prefix "" { policy = "read" }'
consul acl policy update -name "anonymous-readagent" -description "匿名读agent" \
-rules 'agent_prefix "" { policy = "read" }, service_prefix "" { policy = "write" }'
# 查看
consul acl policy read -name anonymous-readagent
# 给token增加策略
consul acl token update -accessor-id anonymous \
-policy-name "anonymous-readagent"
运行服务
# 版本
nomad --version
# Nomad v1.2.8
# 命令 Must specify either server, client or dev mode for the agent.
systemctl status nomad
# 实际运行的命令, 配置支持conf/json/hcl, 推荐hcl
sudo nomad agent -config=/etc/nomad.hcl
# 配置文档: https://developer.hashicorp.com/nomad/docs/configuration
# 起来server和client
# /etc/nomad.hcl
data_dir = "/var/lib/nomad"
server {
enabled = true
bootstrap_expect = 1
}
client {
enabled = true
}
# 低版本的docker需要配置特权模式, 并在task.config设置 privileged = true
plugin "docker" {
config {
allow_privileged = true
}
}
consul {
address = "127.0.0.1:8500"
token = "3354dd49-c77b-39c5-b9fb-cff01b21521d"
}
# 需要 配置 acl 授权验证
# 管理界面,默认端口4646, 会跟随api同时启动
nomad ui -show-url
# URL for web UI: http://127.0.0.1:4646
# 如果服务异常直接删除data_dir的数据, 比如提示Duplicate client-id
https://support.hashicorp.com/hc/en-us/articles/7922521461651-Duplicate-client-id
管理
# 节点
nomad node-status
# ID DC Name Class Drain Eligibility Status
# e039952a dc1 master <none> false eligible ready
# 服务端
nomad server members
# Name Address Port Status Leader Protocol Build Datacenter Region
# master.global 172.28.4.133 4648 alive true 2 1.2.8 dc1 global
# job状态列表
nomad job status
# ID Type Priority Status Submit Date
# example service 50 running 2023-03-23T10:08:35+08:00
# httpserver service 50 running 2023-03-23T11:05:22+08:00
部署配置生成
# job的方式进行部署
# 文档: https://www.nomadproject.io/docs/job-specification/job
nomad init
# 默认是用driver = "docker"的方式部署的,可以修改为podman
# 推荐service name和task name一致
#
nomad run example.nomad
nomad status example
nomad logs 883269bf redis
nomad alloc restart 883269bf redis
# 删除job
nomad job stop -purge redis
# 更新
nomad job plan nginx.nomad
nomad job run nginx.nomad
# Failed to pull `nginx:latest`: Get https://registry-1.docker.io/v2/: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
# https://github.com/moby/moby/issues/22635#issuecomment-224708869
# 验证服务器是否可用
curl https://registry-1.docker.io/v2/
curl https://registry.jihulab.com/v2
cat /etc/resolv.conf
加入节点(servers)
# 注意节点的名称需要唯一
nomad server join [options] <addr> [<addr>...]
# 默认服务端口是4648
nomad server join 10.0.0.8:4648
job使用, 包含几个重要配置
# 通常是: job->group->n*task
# 分组
# https://developer.hashicorp.com/nomad/docs/job-specification/group
# 健康检查
# https://developer.hashicorp.com/nomad/docs/job-specification/check
# 自动缩放
# https://developer.hashicorp.com/nomad/docs/job-specification/scaling
# 更新策略
# https://developer.hashicorp.com/nomad/docs/job-specification/update
负载均衡 Load Balancer
# 参考 NGINX: https://developer.hashicorp.com/nomad/tutorials/load-balancing/load-balancing-nginx
# 注意如果版本太低,部分template模板语法不支持
# 当服务副本发生变化,服务会重载的对于nomadService的数据
# 镜像使用带tag的镜像,否则默认的latest会每次都删除再拉,网络问题导致失败
template模板
# Nomad v1.4.4 可用
# {{加{{-可以去掉前后的空格和换行
template {
data = <<EOH
:8080 {
reverse_proxy * {
to {{range nomadService "httpserver" }}http://{{ .Address }}:{{ .Port }} {{end }}
# 负载均衡策略
# lb_policy ip_hash
# 检查检查地址/状态/检查间隔时间
# health_uri /health_uri
# health_status 200
# health_interval 500ms
}
}
EOH
关于升级
# 就是配置文件的data_dir目录,如果升级失败,直接删除重来
/var/lib/nomad
# 升级之后文件报错, 需要重新job init创建配置文件
${attr.consul.version} semver >= 1.7.0
# 查看文件系统
nomad job status
nomad job status NginxLoadBalancer
nomad alloc fs 2c0ddc48 taskNginxLoadBalancer/local/load-balancer.conf
# 或者
find /var/lib/nomad/alloc/|grep load-balancer.conf
nomad alloc logs 2c0ddc48
访问测试
# linux
while true; do curl -sIL -w "%{http_code}\n" -o /dev/null http://192.168.122.204:8080/; sleep 0.1; done
# powershell
1..10000 | foreach { sleep 0.1 && curl -sIL -w "%{http_code}\n" -o /dev/null http://192.168.122.204:8080/ }
quick start webserver
job "web-service" {
# 低版本需要指定DC: nomad node status
datacenters = ["*"]
group "web" {
count = 2
network {
port "http" {
to = 80
}
}
service {
# 正式
tags = ["release_tag", "${var.imageurl}"]
# 标记金丝雀
canary_tags = ["canary_tag"]
name = "serviceweb"
port = "http"
provider = "consul"
check {
type = "http"
path = "/"
interval = "2s"
timeout = "2s"
}
}
task "web-server" {
driver = "docker"
config {
image = "jcleng/adminer:latest"
ports = ["http"]
}
resources {
cpu = 1500 # MHz
memory = 800 # MB
}
}
}
# 更新策略
update {
# 并行执行的最大更新次数
max_parallel = 1
# 最小健康时间
min_healthy_time = "30s"
# 健康截止日期
healthy_deadline = "5m"
# 被标记为健康的最后期限
progress_deadline = "15m"
# 回滚
auto_revert = true
# 灰度数量
canary = 1
}
}
NginxLoadBalancer
job "NginxLoadBalancer" {
datacenters = ["dc1"]
group "groupNginxLoadBalancer" {
count = 1
network {
# 端口映射
port "http" {
# 主机
static = "80"
# 容器
to = "80"
}
# port "https" {
# static = "443"
# to = "443"
# }
}
service {
name = "serviceNginxLoadBalancer"
port = "http"
# 服务注册提供程序 nomad或者consul
# 分别对应 nomadService 和 service, 在使用range的时候需要区分
# 否则会提示: Missing: health.service(服务名称|passing)
provider = "consul"
}
task "taskNginxLoadBalancer" {
driver = "docker"
config {
# 镜像使用gitlab的私有镜像配置auth, 密码使用访问令牌token
# 注意不要使用latest, 否则每次重启都会拉取镜像
image = "nginx:v1"
privileged = false
# 容器内端口, 只能是定义的别名
ports = ["http"]
volumes = [
"local:/etc/nginx/conf.d",
]
}
resources {
cpu = 1500 # MHz
memory = 100
}
# 查看服务列表: nomad job status
# service.name 的 名称
# 模板
# https://github.com/hashicorp/consul-template#multiple-commands https://github.com/hashicorp/nomad/issues/8137
template {
data = <<EOF
upstream backend {
{{ range service "release_tag.serviceweb" }}
server {{ .Address }}:{{ .Port }};
{{ else }}
server 127.0.0.1:65535; # force a 502
{{ end }}
}
upstream canary {
{{ range service "canary_tag.serviceweb" }}
server {{ .Address }}:{{ .Port }};
{{ else }}
server 127.0.0.1:65535; # force a 502
{{ end }}
}
server {
listen 80;
location / {
# 灰度: X-Forwarded-For:canary
if ($http_x_forwarded_for = "canary") {
proxy_pass http://canary;
}
proxy_pass http://backend;
}
}
EOF
destination = "local/load-balancer.conf"
change_mode = "signal"
change_signal = "SIGHUP"
}
}
}
}
配置 acl 授权验证
# 配置
acl {
enabled = true
}
nomad status
# Error querying jobs: Unexpected response code: 403 (Permission denied)
nomad acl bootstrap
export NOMAD_TOKEN=e597d2fc-1e9f-3bde-be25-5220f3b0b274
nomad status
# ID Type Priority Status Submit Date
# NginxLoadBalancer service 50 running 2023-10-10T15:08:44+08:00
# web-service service 50 running 2023-10-10T15:13:06+08:00
# web UI
nomad ui -authenticate -show-url
远程连接
docker run --rm \
-e NOMAD_ADDR=http://www.leng2011.icu:4646 \
-e NOMAD_TOKEN=46605821-xxxxxx-4ff77aeaad2a \
--name=nomad docker.io/hashicorp/nomad:latest job status
# 在Nomad中使用 nomad job run 命令运行一个作业时,如果存在灰度发布,命令会等待直到灰度发布完成才会停止。
hcl 中声明变量, 从命令行传入
variable "imageurl" {
description = "镜像地址"
default = "registry.jihulab.com/jcleng/imgsite:latest"
}
# hcl使用
image = "${var.imageurl}"
# 命令行传入
nomad job plan -var imageurl=$NEW_IMG dep.hcl
java
task "java" {
# java使用chroot运行的, java可执行文件变量环境映射到指定目录: https://developer.hashicorp.com/nomad/docs/drivers/java#chroot
driver = "java"
config {
jar_path = "local/my-project-0.0.1-SNAPSHOT.jar"
jvm_options = ["-Xmx202m", "-Xms200m"]
}
artifact {
source = "http://192.168.20.153:7777/my-project-0.0.1-SNAPSHOT.jar"
options {
checksum = "md5:875cb9b0899470b3c374e3666f3d6b93"
}
}
}
traefikLoadBalancer 会自动监听文件变化, 无需配置change_mode/change_signal
config {
image = "traefik:v2.2"
privileged = true
network_mode = "host"
volumes = [
"local/traefik-file-rule.yml:/etc/traefik/traefik-file-rule.yml",
"local/traefik.yml:/etc/traefik/traefik.yml",
]
}
template {
data = <<EOF
tls:
certificates:
- certFile: /etc/nginx/crt/ssl.crt
keyFile: /etc/nginx/crt/key.txt
http:
routers:
myMasterServices:
# 访问/
rule: "PathPrefix(`/`)"
service: myWeb
# 加入TLS
tls: {}
services:
myWeb:
weighted:
services:
- name: releaseWeb
weight: 3
{{if nomadService "canary.serviceweb"}}
- name: canaryWeb
weight: 1
{{end}}
{{if nomadService "release.serviceweb"}}
releaseWeb:
loadBalancer:
servers:
{{ range nomadService "release.serviceweb" }}
- url: http://{{ .Address }}:{{ .Port }}
{{ else }}
url: http://0.0.0.0:1024
{{ end }}
{{end}}
{{if nomadService "canary.serviceweb"}}
canaryWeb:
loadBalancer:
servers:
{{ range nomadService "canary.serviceweb" }}
- url: http://{{ .Address }}:{{ .Port }}
{{ else }}
url: http://0.0.0.0:1024
{{ end }}
{{end}}
EOF
destination = "local/traefik-file-rule.yml"
}
template {
data = <<EOF
entryPoints:
web:
address: :80
# websecure:
# address: :443
api:
insecure: true
dashboard: true
providers:
# 文件配置示例: https://doc.traefik.io/traefik/providers/file/#filename
file:
filename: /etc/traefik/traefik-file-rule.yml
EOF
destination = "local/traefik.yml"
}
# 语法
https://pkg.go.dev/text/template
# if 语句
{{if nomadService "release.serviceweb"}}
{{end}}
# range遍历语句
{{ range nomadService "canary.serviceweb" }}
- url: http://{{ .Address }}:{{ .Port }}
{{ else }}
url: http://0.0.0.0:1024
{{ end }}