OCP升级(3.6->3.7)
有个好文档还是靠普很多,感谢同事的文档。升级步骤记录如下
1.检查现有环境
[root@master ~]# etcd --version etcd Version: 3.2.22 Git SHA: 1674e68 Go Version: go1.9.2 Go OS/Arch: linux/amd64 [root@master ~]# etcdctl2 cluster-health member a4e80e96ea75f7c8 is healthy: got healthy result from https://192.168.0.103:2379 cluster is healthy [root@master ~]# etcdctl2 member list a4e80e96ea75f7c8: name=master.example.com peerURLs=https://192.168.0.103:2380 clientURLs=https://192.168.0.103:2379 isLeader=true [root@master ~]# oc get dc/router -n default NAME REVISION DESIRED CURRENT TRIGGERED BY router 4 2 2 config [root@master ~]# oc get dc/docker-registry -n default NAME REVISION DESIRED CURRENT TRIGGERED BY docker-registry 1 1 1 config [root@master ~]# oc get node --show-labels NAME STATUS AGE VERSION LABELS master.example.com Ready 14h v1.6.1+5115d708d7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=master.example.com,openshift-infra=apiserver,region=infra,router=true,zone=default node1.example.com Ready 14h v1.6.1+5115d708d7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=node1.example.com,region=infra,router=true,zone=default node2.example.com NotReady 14h v1.6.1+5115d708d7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=node2.example.com,region=infra,zone=default [root@master ~]# oc get pv No resources found. [root@master ~]# oc get pvc No resources found.
2.备份ansible脚本
cd /usr/share/ansible cp -r plugins/ plugins-3.6.bak cp -r openshift-ansible/ openshift-ansible-3.6.bak ll total 0 drwxr-xr-x. 5 root root 51 Jul 27 19:21 openshift-ansible drwxr-xr-x. 5 root root 51 Jul 28 01:41 openshift-ansible-3.6.bak drwxr-xr-x. 2 root root 50 Jul 27 19:21 plugins drwxr-xr-x. 2 root root 50 Jul 28 01:40 plugins-3.6.bak drwxr-xr-x. 2 root root 6 Jan 16 2018 roles
3.备份etcd数据
cp -r /etc/etcd /etc/etcd.bak etcdctl2 backup --data-dir /var/lib/etcd --backup-dir /tmp/etcd-data cp /var/lib/etcd/member/snap/db /tmp/etcd-data ll /etc/etcd.bak total 48 drwx------. 5 root root 212 Jul 28 01:47 ca -rw-------. 1 root root 1895 Jul 28 01:47 ca.crt -rw-r--r--. 1 root root 1526 Jul 28 01:47 etcd.conf -rw-r--r--. 1 root root 1686 Jul 28 01:47 etcd.conf.32617.2018-07-28@00:22:18~ drwx------. 8 root root 4096 Jul 28 01:47 generated_certs -rw-------. 1 root root 5916 Jul 28 01:47 peer.crt -rw-r--r--. 1 root root 1005 Jul 28 01:47 peer.csr -rw-------. 1 root root 1704 Jul 28 01:47 peer.key -rw-------. 1 root root 5873 Jul 28 01:47 server.crt -rw-r--r--. 1 root root 1005 Jul 28 01:47 server.csr -rw-------. 1 root root 1704 Jul 28 01:47 server.key ll /var/lib/etcd/member/snap/db /tmp/etcd-data -rw-------. 1 etcd etcd 25952256 Jul 28 01:48 /var/lib/etcd/member/snap/db /tmp/etcd-data: total 16320 -rw-------. 1 root root 25952256 Jul 28 01:48 db drwx------. 4 root root 29 Jul 28 01:47 member
4.备份配置文件
master节点
cp -r /etc/origin/master /etc/origin/master.bak cp -r /etc/origin/node /etc/origin/node.bak cp -r /etc/sysconfig/atomic-openshift-master-api /etc/sysconfig/atomic-openshift-master-api.bak cp -r /etc/sysconfig/atomic-openshift-master-controllers /etc/sysconfig/atomic-openshift-master-controller.bak cp -r /etc/sysconfig/atomic-openshift-node /etc/sysconfig/atomic-openshift-node.bak ll /etc/origin/master.bak ll /etc/origin/node /etc/origin/node.bak ll /etc/sysconfig/atomic-openshift-master-api.bak ll /etc/sysconfig/atomic-openshift-master-controller.bak ll /etc/sysconfig/atomic-openshift-node.bak
node节点
cp -r /etc/origin/node /etc/origin/node.bak cp -r /etc/sysconfig/atomic-openshift-node /etc/sysconfig/atomic-openshift-node.bak ll /etc/origin/node.bak ll /etc/sysconfig/atomic-openshift-node.bak
备份yum repo文件
cp /etc/yum.repos.d/ocp.repo /etc/yum.repos.d/ocp.repo.bak
5.修改yum repository文件
当然先需要把Package导入
[root@master ~]# cat /etc/yum.repos.d/ocp.repo [local-rhel-7-server-rpms] name=rhel-7-server-rpms baseurl=http://192.168.56.103:8080/repo/rhel-7-server-rpms enabled=1 gpgcheck=0 [local-rhel-7-server-extras-rpms] name=rhel-7-server-extras-rpms baseurl=http://192.168.56.103:8080/repo/rhel-7-server-extras-rpms enabled=1 gpgcheck=0 [local-rhel-7-fast-datapath-rpms] name=rhel-7-fast-datapath-rpms baseurl=http://192.168.56.103:8080/repo/rhel-7-fast-datapath-rpms enabled=1 gpgcheck=0 [local-rhel-7-server-ose-3.7-rpms] name=rhel-7-server-ose-3.7-rpms baseurl=http://192.168.56.103:8080/repo/rhel-7-server-ose-3.7-rpms enabled=1 gpgcheck=0
yum clean all
yum repolist
yum update -y
6.升级ansible
yum update atomic-openshift-utils ansible rpm -qa | grep atomic-openshift-utils atomic-openshift-utils-3.7.57-1.git.33.cf01e48.el7.noarch
7.备份模板
mv /usr/share/openshift/examples /usr/share/openshift/examples.3.6 mkdir -p /usr/share/openshift/examples cp -R /usr/share/ansible/openshift-ansible/roles/openshift_examples/files/examples/v3.7/* /usr/share/openshift/examples/
8.导入镜像
导入镜像过程略,完成后需要对registry-console进行重新tag和push
docker tag registry.example.com/openshift3/registry-console:v3.7.64 registry.example.com/openshift3/registry-console:v3.7 docker push registry.example.com/openshift3/registry-console:v3.7
10.修改hosts文件
/etc/ansible/hosts文件
[OSEv3:children] masters nodes etcd nfs [OSEv3:vars] ansible_ssh_user=root openshift_deployment_type=openshift-enterprise openshift_release=v3.7 osm_use_cockpit=true osm_cockpit_plugins=['cockpit-kubernetes'] openshift_cockpit_deployer_prefix='openshift3/' openshift_cockpit_deployer_version='v3.7.64' osm_cluster_network_cidr=10.128.0.0/14 openshift_portal_net=172.30.0.0/16 openshift_master_api_port=8443 openshift_master_console_port=8443 openshift_enable_service_catalog=true openshift_service_catalog_image_prefix=registry.example.com/openshift3/ose- openshift_service_catalog_image_version=v3.7.64 ansible_service_broker_image_prefix=registry.example.com/openshift3/ose- ansible_service_broker_etcd_image_prefix=registry.example.com/rhel7/ template_service_broker_prefix=registry.example.com/openshift3/ oreg_url=registry.example.com/openshift3/ose-${component}:${version} openshift_examples_modify_imagestreams=true openshift_clock_enabled=true openshift_metrics_storage_kind=nfs openshift_metrics_install_metrics=true openshift_metrics_storage_access_modes=['ReadWriteOnce'] openshift_metrics_storage_host=nfs.example.com openshift_metrics_storage_nfs_directory=/exports openshift_metrics_storage_volume_name=metrics openshift_metrics_storage_volume_size=10Gi openshift_metrics_hawkular_hostname=hawkular-metrics.apps.example.com openshift_metrics_cassandra_storage_type=emptydir openshift_metrics_image_prefix=registry.example.com/openshift3/ openshift_hosted_metrics_deploy=true openshift_hosted_metrics_public_url=https://hawkular-metrics.apps.example.com/hawkular/metrics openshift_metrics_image_prefix=registry.example.com/openshift3/ openshift_metrics_image_version=v3.7.64 openshift_template_service_broker_namespaces=['openshift'] template_service_broker_selector={"node": "true"} openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}] # Default login account: admin / handhand openshift_master_htpasswd_users={'admin': '$apr1$gfaL16Jf$c.5LAvg3xNDVQTkk6HpGB1'} #openshift_repos_enable_testing=true openshift_disable_check=docker_image_availability,disk_availability,memory_availability,docker_storage docker_selinux_enabled=false openshift_docker_options=" --selinux-enabled --insecure-registry 172.30.0.0/16 --log-driver json-file --log-opt max-size=50M --log-opt max-file=3 --insecure-registry registry.example.com --add-registry registry.example.com" openshift_docker_additional_registries=registry.example.com openshift_docker_insecure_registries=registry.example.com osm_etcd_image=rhel7/etcd openshift_logging_image_prefix=registry.example.com/openshift3/ openshift_hosted_router_selector='region=infra,router=true' openshift_master_default_subdomain=app.example.com # host group for masters [masters] master.example.com # host group for etcd [etcd] master.example.com # host group for nodes, includes region info [nodes] master.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true node1.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true node2.example.com openshift_node_labels="{'region': 'infra', 'zone': 'default'}" openshift_schedulable=true [nfs] nfs.example.com
11.升级前的检查
ansible-playbook --tags pre_upgrade /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade.yml
遇到一个问题说master,node1,node2的节点的Docker的版本需要1.12以上,但master,node1,node2的docker版本已经是1.13.1
解决办法,修改/etc/yum.conf文件,屏蔽下面这句话,跳过检查
#exclude= docker*1.20* docker*1.19* docker*1.18* docker*1.17* docker*1.16* docker*1.15* docker*1.14* docker*1.13*
12.升级
升级master节点
ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_control_plane.yml | tee /tmp/upgrade_control_plane_to_3_7.log;
升级完后显示
TASK [Warn if shared-resource-viewer could not be updated] ************************************************************************************************************** task path: /usr/share/ansible/openshift-ansible/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml:134 skipping: [master.example.com] => {"changed": false, "skip_reason": "Conditional result was False"} META: ran handlers PLAY RECAP ************************************************************************************************************************************************************** localhost : ok=26 changed=0 unreachable=0 failed=0 master.example.com : ok=410 changed=87 unreachable=0 failed=0 nfs.example.com : ok=33 changed=2 unreachable=0 failed=0 node1.example.com : ok=50 changed=2 unreachable=0 failed=0 node2.example.com : ok=50 changed=2 unreachable=0 failed=0
检查
[root@master ansible]# oc get nodes NAME STATUS AGE VERSION master.example.com Ready 3h v1.7.6+a08f5eeb62 node1.example.com Ready 3h v1.6.1+5115d708d7 node2.example.com Ready 3h v1.6.1+5115d708d7 [root@master ansible]# oc get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE default docker-registry-2-94zld 1/1 Running 0 6m default registry-console-2-lmhgx 1/1 Running 0 6m default router-2-8hnmz 1/1 Running 0 6m default router-2-g6tlm 1/1 Running 0 5m kube-service-catalog apiserver-z6nmz 1/1 Running 4 2h kube-service-catalog controller-manager-d2jgc 0/1 CrashLoopBackOff 9 2h openshift-infra hawkular-cassandra-1-gp4n9 1/1 Running 0 10m openshift-infra hawkular-metrics-4j828 1/1 Running 1 2h openshift-infra heapster-rgwrw 1/1 Running 6 3h [root@master ansible]# oc version oc v3.7.64 kubernetes v1.7.6+a08f5eeb62 features: Basic-Auth GSSAPI Kerberos SPNEGO Server https://master.example.com:8443 openshift v3.7.64 kubernetes v1.7.6+a08f5eeb62
升级node节点
ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_nodes.yml -e openshift_upgrade_nodes_serial=1 | tee /tmp/upgrade_node_to_3_7.log;
升级完成后提示
TASK [include] ********************************************************************************************************************************************************** task path: /usr/share/ansible/openshift-ansible/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml:83 skipping: [node2.example.com] => {"changed": false, "skip_reason": "Conditional result was False"} META: ran handlers PLAY RECAP ************************************************************************************************************************************************************** localhost : ok=12 changed=0 unreachable=0 failed=0 master.example.com : ok=76 changed=4 unreachable=0 failed=0 nfs.example.com : ok=33 changed=2 unreachable=0 failed=0 node1.example.com : ok=174 changed=30 unreachable=0 failed=0 node2.example.com : ok=174 changed=30 unreachable=0 failed=0
检查
[root@master ansible]# oc get nodes NAME STATUS AGE VERSION master.example.com Ready 3h v1.7.6+a08f5eeb62 node1.example.com Ready 3h v1.7.6+a08f5eeb62 node2.example.com Ready 3h v1.7.6+a08f5eeb62 [root@master ansible]# oc get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE default docker-registry-2-x7sqf 1/1 Running 0 4m default registry-console-2-mslwb 1/1 Running 0 4m default router-2-79qjr 1/1 Running 0 8m default router-2-8hnmz 1/1 Running 0 20m kube-service-catalog apiserver-z6nmz 1/1 Running 4 3h kube-service-catalog controller-manager-d2jgc 0/1 Error 12 2h openshift-infra hawkular-cassandra-1-qg2bj 1/1 Running 0 8m openshift-infra hawkular-metrics-bqqm8 0/1 Running 0 4m openshift-infra heapster-zt87w 1/1 Running 0 8m