51cto_Kubernetes全栈架构师 第二章 kubeadm高可用安装k8s集群1.20版
2.1 基本环境配置
centos7.x
高可用 kubernetes 集群规划
| 主机名 | ip地址 | 说明 | 额外说明 |
|---|---|---|---|
| k8s-master01~03 | 10.11.1.108,10.11.1.109,10.11.1.110 | master节点*3 | |
| k8s-master-LoaderBalance | 10.11.1.236 | keepalived虚拟ip | 可使用: 硬件负载均衡F5, 阿里云SLB, 腾讯云ELB, keepAlived+HAProxy |
| k8s-node01~02 | 10.11.1.111,10.11.1.112 | worker节点*2 |
| 配置 | 备注 |
|---|---|
| 操作系统版本 | Centos x64 7.9 |
| Docker版本 | 19.03 x |
| Pod网段 | 172.168.0.0/12 |
| Service网段 | 10.96.0.0/12 |
VIP(虚拟ip)选用说明
VIP(虚拟IP)不要和公司内网IP重复,怎么确定不重复? ping不通即不重复 VIP(虚拟ip)需要和主机在同一个局域网内。 作者说明:VIP(虚拟ip)需要和主机在同一个局域网内,且是该局域网内未使用的新IP即可。
set_hostname
##k8s-master-01
hostnamectl set-hostname k8s-master-01
##k8s-master-02
hostnamectl set-hostname k8s-master-02
##k8s-master-03
hostnamectl set-hostname k8s-master-03
##k8s-node-01
hostnamectl set-hostname k8s-node-01
##k8s-node-02
hostnamectl set-hostname k8s-node-02
set_hosts.sh
#curl https://gitee.com/k08s/k8s_note/raw/dev/51cto_Kubernetes/set_hosts.sh | bash
sed -i '/k8s/d' /etc/hosts
cat << EOF >> /etc/hosts
10.11.1.108 k8s-master-01
10.11.1.109 k8s-master-02
10.11.1.110 k8s-master-03
10.11.1.111 k8s-node-01
10.11.1.112 k8s-node-02
EOF
close_firewall.sh
systemctl disable --now firewalld
systemctl disable --now NetworkManager
systemctl disable --now dnsmasq
disable_selinux.sh
setenforce 0
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
close_swap.sh
swapoff -a && sysctl -w vm.swappiness=0
sed -i.bak -r 's/(.+ swap .+)/#\1/' /etc/fstab
setup_ntpdate.sh
rpm -ivh http://mirrors.wlnmp.com/centos/wlnmp-release-centos.noarch.rpm
yum -y install ntpdate
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo 'Asia/Shanghai' > /etc/timezone
ntpdate time2.aliyun.com/centos/wlnmp-release-centos
echo "*/5 * * * * root ntpdate time2.aliyun.com " >> /etc/crontab
# echo "*/5 * * * * ntpdate time2.aliyun.com" >> /var/spool/cron/root
ulimit_set.sh
ulimit -SHn 65535
cat <<EOF>> /etc/security/limits.conf
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF
keygen_util.sh
#run only in k8s-master-01
#TODO : cfssl ? ssh-keygen
#TODO : 去掉 回答问题和输入密码 ?
ssh-keygen -t rsa
for i in k8s-master-01 k8s-master-02 k8s-master-03 k8s-node-01 k8s-node-02 ;
do ssh-copy-id -i .ssh/id_rsa.pub $i;
done
set_centos7_repo_using_aliyun.sh
#curl https://gitee.com/k08s/k8s_note/raw/dev/51cto_Kubernetes/set_centos7_repo_using_aliyun.sh | bash
#https://developer.aliyun.com/mirror/centos
base_repo=/etc/yum.repos.d/CentOS-Base.repo
mv $base_repo ${base_repo}_`date +%s`
curl -o $base_repo https://mirrors.aliyun.com/repo/Centos-7.repo
sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo
os_upgrade.sh
yum update -y --exclude=kernel* && reboot
2.2 内核配置
Linux内核升级至4.18+
作者猜测: 低于该版本的Linux kernel与docker 19.03 配合可能有各种bug ?
linux_kernel_download.sh
#run only in k8s-master-01
#TODO: 避免输入密码
cd /root/
wget http://193.49.22.109/elrepo/kernel/el7/x86_64/RPMS/kernel-ml-4.19.12-1.el7.elrepo.x86_64.rpm
wget http://193.49.22.109/elrepo/kernel/el7/x86_64/RPMS/kernel-ml-devel-4.19.12-1.el7.elrepo.x86_64.rpm
for i in k8s-master-01 k8s-master-02 k8s-master-03 k8s-node-01 k8s-node-02 ;
do scp kernel-ml-4.19.12-1.el7.elrepo.x86_64.rpm kernel-ml-devel-4.19.12-1.el7.elrepo.x86_64.rpm $i:/root/;
done
linux_kernel_upgrade.sh
#查看默认内核
grubby --default-kernel
#/boot/vmlinuz-3.10.0-1160.42.2.el7.x86_64
cd /root/ && yum localinstall -y kernel-ml-*
#如果此时重启任意一个Linux系统,你会发现,grub菜单中的第一项是kernel-4.19 ,但是默认选中的还是之前老内核kernel-3.10
#grub2-set-default 0 中的0 就是指 grub菜单中的第一项kernel-4.19
grub2-set-default 0 && grub2-mkconfig -o /etc/grub2.cfg
grubby --args="user_namespace.enable=1" --update-kernel="$(grubby --default-kernel)"
#查看默认内核
grubby --default-kernel
#重启后,将使用新内核
reboot
#uname -a
ipvsadm_install.sh
#生产环境使用ipvs, 而不推荐使用iptables
yum install -y ipysadm ipset sysstat conntrack libseccomp
#linux kernel 4.19+: nf_conntrack
#linux kernel 4.18以下: nf_conntrack_ipv4
#此处为 linux kernel 4.19+, 使用 nf_conntrack
#参考: ? : https://docs.projectcalico.org/getting-started/kubernetes/requirements
cat <<EOF> /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF
systemctl enable --now systemd-modules-load.service
#检测内核模块是否加载:
lsmod | grep -e ip_vs -e nf_conntrack
k8s_kernel_param_set.sh
开启k8s集群中必须的内核参数, 所有节点都要执行:
cat <<EOF> /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward=1
net.bridge.bridge-nf-call-iptables=1
net.bridge.brige-nf-call-ip6tables=1
fs.may_detach_mounts=1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time=600
net.ipv4.tcp_keepalive_probes=3
net.ipv4.tcp_keepalive_intvl=15
net.ipv4.tcp_max_tw_buckets=36000
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_max_orphans=327680
net.ipv4.tcp_syncookies=1
net.ipv4.tcp_max_syn_backlog=16384
net.ipv4.ip_conntrack_max=65536
net.ipv4.tcp_timestamps=0
net.core.somaxconn=16384
EOF
sysctl --system
reboot
2.3 安装k8s基本组件
install_docker-ce_using_aliyun_mirror.sh
#curl https://gitee.com/k08s/k8s_note/raw/dev/51cto_Kubernetes/install_docker-ce_using_aliyun_mirror.sh | bash
#https://developer.aliyun.com/mirror/docker-ce
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
yum makecache fast
yum list docker-ce.x86_64 --showduplicates | sort -r
yum -y install docker-ce-19.03.9-3.el7.centos
#yum -y install docker-ce-19.03.*
#service docker start
mkdir /etc/docker/
cat <<EOF> /etc/docker/daemon.json
{
"exec-opts":["native.cgroupdriver=systemd"]
}
EOF
systemctl daemon-reload && systemctl enable --now docker
install_k8s_using_aliyun_mirror.sh
#curl https://gitee.com/k08s/k8s_note/raw/dev/51cto_Kubernetes/install_k8s_using_aliyun_mirror.sh | bash
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
#yum list kubeadm --showduplicates | sort -r
#{所有节点安装: kubeadm.
# kubelet kubectl 会被自动安装
yum install -y --nogpgcheck kubeadm
cat <<EOF> /etc/sysconfig/kubelet
KUBELET_EXTRA_ARGS="--cgroup-driver=systemd --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.2"
EOF
systemctl daemon-reload
systemctl enable --now kubelet
#}
2.4 安装高可用组件
keepalived_haproxy_install.sh
#所有master节点安装 KeepAlived 和 HAProxy
yum -y install keepalived haproxy
HAProxy_set.sh
mkdir /etc/haproxy
cat <<EOF> /etc/haproxy/haproxy.cfg
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1 local0 err
stats timeout 30s
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend monitor-in
bind *:33305
mode http
option httplog
monitor-uri /monitor
frontend k8s-master
bind 0.0.0.0:16443
bind 127.0.0.1:16443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server k8s-master-01 10.0.2.18:6443 check
server k8s-master-02 10.0.2.19:6443 check
server k8s-master-03 10.0.2.20:6443 check
EOF
KeepAlived_master_01.sh
cat <<EOF> /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver{
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1{
state MASTER
interface enp0s3
mcast_src_ip 10.0.2.18
virtual_router_id 51
priority 101
advert_int 2
authentication {
auth_type PASS
auth_pass K8SSHA_KA_AUTH
}
virtual_ipaddress {
10.0.2.236
}
track_script {
chk_apiserver
}
}
EOF
KeepAlived_master_02.sh
cat <<EOF> /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1{
state BACKUP
interface enp0s3
mcast_src_ip 10.0.2.19
virtual_router_id 51
priority 100
advert_int 2
authentication {
auth_type PASS
auth_pass K8S_KA_AUTH
}
virtual_ipaddress {
10.0.2.236
}
track_script {
chk_apiserver
}
}
EOF
KeepAlived_master_03.sh
cat <<EOF> /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP
interface enp0s3
mcast_src_ip 10.0.2.20
virtual_router_id 51
priority 100
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
10.0.2.236
}
track_script {
chk_apiserver
}
}
EOF
check_apiserver.sh
#所有master节点都有check_apiserver.sh
cat <<EOF > /etc/keepalived/check_apiserver.sh
#!/bin/bash
err=0
#如果haproxy连续4次有问题, 则停止本master机器上的KeepAlived
#本master机器上的KeepAlived停止后, 其他master机器上的KeepAlived会启动
for k in $(seq 1 4)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err = $(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
/usr/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
EOF
chmod +x /etc/keepalived/check_apiserver.sh
haproxy_keepalived_boot.sh
#只在k8s-master-01上执行
systemctl daemon-reload
systemctl enable --now haproxy
systemctl enable --now keepalived
#k8s-master-01上
#ip address #网卡enp0s3会多一个ip 10.0.2.236, 此即虚拟ip
#/var/log/messages 日志中会有类似内容 : Sending gratuitous ARP on enp0s3 for 10.0.2.236
#到其他节点 比如 k8s-master-02,k8s-master-03 试着连接虚拟ip 10.0.2.236
#ping 10.0.2.236
#telnet 10.0.2.236 16443
2.5 集群初始化
init_cluster__kubeadm_init.sh
#好用吗?不知道. 反正暂时没用这条命令.
kubeadm init --control-plane-endpoint "10.0.2.236:16443" --upload-certs
kubeadm-config.yaml
#/root/kubeadm-config.yaml : 在所有节点都有本文件.
#kubernetesVersion的值 填写 命令 kubeadm version的返回
#cat <<EOF> /root/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: 7t2weq.bjbawausm0jaxury
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 10.0.2.108
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: k8s-master-01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
certSANs:
- 10.0.2.236
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 10.0.2.236:16443
controllerManager: {}
dns:
#docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
#没有: docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.8.4
#有: docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
imageTag: 1.8.4
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.22.1
networking:
dnsDomain: cluster.local
podSubnet: 172.168.0.0/12
serviceSubnet: 10.96.0.0/12
scheduler: {}
#EOF
migrate后的kubeadm-config.yaml: kubeadm-config-new.yaml
# /root/kubeadm-config-new.yaml
#参考: https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta3/#kubeadm-k8s-io-v1beta3-ImageMeta
#kubernetesVersion的值 填写 命令 kubeadm version的返回
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: 7t2weq.bjbawausm0jaxury
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 10.11.1.108
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
imagePullPolicy: IfNotPresent
name: k8s-master-01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
certSANs:
- 10.11.1.236
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 10.11.1.236:16443
controllerManager: {}
dns:
#docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
#没有: docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.8.4
#有: docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
imageTag: 1.8.4
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.22.1
networking:
dnsDomain: cluster.local
podSubnet: 172.168.0.0/12
serviceSubnet: 10.96.0.0/12
scheduler: {}
init_cluster.sh
#init_cluster.sh:以下命令, 无特殊说明, 均在所有节点执行.
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
#没有: docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.8.4
#有: docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
# 2021-09-16 08:49 进度, 解决没有coredns:v1.8.4问题.
#参考: https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta3/#kubeadm-k8s-io-v1beta3-ImageMeta
kubeadm config migrate --old-config /root/kubeadm-config.yaml --new-config /root/kubeadm-config-new.yaml
##20210915 进度 , 2-7 Kubeadm集群初始化.mp4
kubeadm config images pull --config /root/kubeadm-config-new.yaml
systemctl enable --now kubelet
#kubeadm init 只在k8s-master-01执行
kubeadm init --config /root/kubeadm-config-new.yaml --upload-certs
kubeadm reset -f ; ipvsadm --clear ; rm -fr ~/.kube
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u); $(id -g) $HOME/.kube/config