OCP升级(3.6->3.7)

有个好文档还是靠普很多,感谢同事的文档。升级步骤记录如下

1.检查现有环境

[root@master ~]# etcd --version
etcd Version: 3.2.22
Git SHA: 1674e68
Go Version: go1.9.2
Go OS/Arch: linux/amd64
[root@master ~]# etcdctl2 cluster-health
member a4e80e96ea75f7c8 is healthy: got healthy result from https://192.168.0.103:2379
cluster is healthy
[root@master ~]# etcdctl2 member list
a4e80e96ea75f7c8: name=master.example.com peerURLs=https://192.168.0.103:2380 clientURLs=https://192.168.0.103:2379 isLeader=true
[root@master ~]# oc get dc/router -n default
NAME      REVISION   DESIRED   CURRENT   TRIGGERED BY
router    4          2         2         config
[root@master ~]# oc get dc/docker-registry -n default
NAME              REVISION   DESIRED   CURRENT   TRIGGERED BY
docker-registry   1          1         1         config
[root@master ~]# oc get node --show-labels
NAME                 STATUS     AGE       VERSION             LABELS
master.example.com   Ready      14h       v1.6.1+5115d708d7   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=master.example.com,openshift-infra=apiserver,region=infra,router=true,zone=default
node1.example.com    Ready      14h       v1.6.1+5115d708d7   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=node1.example.com,region=infra,router=true,zone=default
node2.example.com    NotReady   14h       v1.6.1+5115d708d7   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=node2.example.com,region=infra,zone=default

[root@master ~]# oc get pv
No resources found.
[root@master ~]# oc get pvc
No resources found.

 

2.备份ansible脚本

cd /usr/share/ansible
cp -r plugins/ plugins-3.6.bak
cp -r openshift-ansible/ openshift-ansible-3.6.bak
ll
total 0
drwxr-xr-x. 5 root root 51 Jul 27 19:21 openshift-ansible
drwxr-xr-x. 5 root root 51 Jul 28 01:41 openshift-ansible-3.6.bak
drwxr-xr-x. 2 root root 50 Jul 27 19:21 plugins
drwxr-xr-x. 2 root root 50 Jul 28 01:40 plugins-3.6.bak
drwxr-xr-x. 2 root root  6 Jan 16  2018 roles

 

3.备份etcd数据

cp -r /etc/etcd /etc/etcd.bak
etcdctl2 backup --data-dir /var/lib/etcd --backup-dir /tmp/etcd-data
cp /var/lib/etcd/member/snap/db /tmp/etcd-data
ll /etc/etcd.bak
total 48
drwx------. 5 root root  212 Jul 28 01:47 ca
-rw-------. 1 root root 1895 Jul 28 01:47 ca.crt
-rw-r--r--. 1 root root 1526 Jul 28 01:47 etcd.conf
-rw-r--r--. 1 root root 1686 Jul 28 01:47 etcd.conf.32617.2018-07-28@00:22:18~
drwx------. 8 root root 4096 Jul 28 01:47 generated_certs
-rw-------. 1 root root 5916 Jul 28 01:47 peer.crt
-rw-r--r--. 1 root root 1005 Jul 28 01:47 peer.csr
-rw-------. 1 root root 1704 Jul 28 01:47 peer.key
-rw-------. 1 root root 5873 Jul 28 01:47 server.crt
-rw-r--r--. 1 root root 1005 Jul 28 01:47 server.csr
-rw-------. 1 root root 1704 Jul 28 01:47 server.key

ll /var/lib/etcd/member/snap/db /tmp/etcd-data
-rw-------. 1 etcd etcd 25952256 Jul 28 01:48 /var/lib/etcd/member/snap/db

/tmp/etcd-data:
total 16320
-rw-------. 1 root root 25952256 Jul 28 01:48 db
drwx------. 4 root root       29 Jul 28 01:47 member

 

4.备份配置文件

master节点

cp -r /etc/origin/master /etc/origin/master.bak
cp -r /etc/origin/node /etc/origin/node.bak
cp -r /etc/sysconfig/atomic-openshift-master-api /etc/sysconfig/atomic-openshift-master-api.bak
cp -r /etc/sysconfig/atomic-openshift-master-controllers  /etc/sysconfig/atomic-openshift-master-controller.bak
cp -r /etc/sysconfig/atomic-openshift-node /etc/sysconfig/atomic-openshift-node.bak

ll /etc/origin/master.bak
ll /etc/origin/node /etc/origin/node.bak
ll /etc/sysconfig/atomic-openshift-master-api.bak
ll /etc/sysconfig/atomic-openshift-master-controller.bak
ll /etc/sysconfig/atomic-openshift-node.bak

 

node节点

cp -r /etc/origin/node /etc/origin/node.bak
cp -r /etc/sysconfig/atomic-openshift-node /etc/sysconfig/atomic-openshift-node.bak

ll /etc/origin/node.bak
ll /etc/sysconfig/atomic-openshift-node.bak

 

备份yum repo文件

cp /etc/yum.repos.d/ocp.repo /etc/yum.repos.d/ocp.repo.bak

 

5.修改yum repository文件

当然先需要把Package导入

[root@master ~]# cat /etc/yum.repos.d/ocp.repo
[local-rhel-7-server-rpms]
name=rhel-7-server-rpms
baseurl=http://192.168.56.103:8080/repo/rhel-7-server-rpms
enabled=1
gpgcheck=0
[local-rhel-7-server-extras-rpms]
name=rhel-7-server-extras-rpms
baseurl=http://192.168.56.103:8080/repo/rhel-7-server-extras-rpms
enabled=1
gpgcheck=0
[local-rhel-7-fast-datapath-rpms]
name=rhel-7-fast-datapath-rpms
baseurl=http://192.168.56.103:8080/repo/rhel-7-fast-datapath-rpms
enabled=1
gpgcheck=0
[local-rhel-7-server-ose-3.7-rpms]
name=rhel-7-server-ose-3.7-rpms
baseurl=http://192.168.56.103:8080/repo/rhel-7-server-ose-3.7-rpms
enabled=1
gpgcheck=0

 

yum clean all
yum repolist

yum update -y

 

6.升级ansible

yum update atomic-openshift-utils ansible
rpm -qa | grep atomic-openshift-utils
atomic-openshift-utils-3.7.57-1.git.33.cf01e48.el7.noarch

 

7.备份模板

mv /usr/share/openshift/examples /usr/share/openshift/examples.3.6
mkdir -p /usr/share/openshift/examples
cp -R /usr/share/ansible/openshift-ansible/roles/openshift_examples/files/examples/v3.7/* /usr/share/openshift/examples/

 

8.导入镜像

导入镜像过程略,完成后需要对registry-console进行重新tag和push

docker tag registry.example.com/openshift3/registry-console:v3.7.64 registry.example.com/openshift3/registry-console:v3.7

docker push registry.example.com/openshift3/registry-console:v3.7

 

10.修改hosts文件

/etc/ansible/hosts文件

[OSEv3:children]
masters
nodes
etcd
nfs

[OSEv3:vars]
ansible_ssh_user=root
openshift_deployment_type=openshift-enterprise
openshift_release=v3.7

osm_use_cockpit=true
osm_cockpit_plugins=['cockpit-kubernetes']
openshift_cockpit_deployer_prefix='openshift3/'
openshift_cockpit_deployer_version='v3.7.64'

osm_cluster_network_cidr=10.128.0.0/14
openshift_portal_net=172.30.0.0/16
openshift_master_api_port=8443
openshift_master_console_port=8443


openshift_enable_service_catalog=true
openshift_service_catalog_image_prefix=registry.example.com/openshift3/ose-
openshift_service_catalog_image_version=v3.7.64
ansible_service_broker_image_prefix=registry.example.com/openshift3/ose-
ansible_service_broker_etcd_image_prefix=registry.example.com/rhel7/
template_service_broker_prefix=registry.example.com/openshift3/
oreg_url=registry.example.com/openshift3/ose-${component}:${version}
openshift_examples_modify_imagestreams=true
openshift_clock_enabled=true

openshift_metrics_storage_kind=nfs
openshift_metrics_install_metrics=true
openshift_metrics_storage_access_modes=['ReadWriteOnce']
openshift_metrics_storage_host=nfs.example.com
openshift_metrics_storage_nfs_directory=/exports
openshift_metrics_storage_volume_name=metrics
openshift_metrics_storage_volume_size=10Gi
openshift_metrics_hawkular_hostname=hawkular-metrics.apps.example.com
openshift_metrics_cassandra_storage_type=emptydir
openshift_metrics_image_prefix=registry.example.com/openshift3/
openshift_hosted_metrics_deploy=true
openshift_hosted_metrics_public_url=https://hawkular-metrics.apps.example.com/hawkular/metrics
openshift_metrics_image_prefix=registry.example.com/openshift3/
openshift_metrics_image_version=v3.7.64


openshift_template_service_broker_namespaces=['openshift']
template_service_broker_selector={"node": "true"}
openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}]
# Default login account: admin / handhand
openshift_master_htpasswd_users={'admin': '$apr1$gfaL16Jf$c.5LAvg3xNDVQTkk6HpGB1'}


#openshift_repos_enable_testing=true
openshift_disable_check=docker_image_availability,disk_availability,memory_availability,docker_storage
docker_selinux_enabled=false
openshift_docker_options=" --selinux-enabled --insecure-registry 172.30.0.0/16 --log-driver json-file --log-opt max-size=50M --log-opt max-file=3 --insecure-registry registry.example.com --add-registry registry.example.com"
openshift_docker_additional_registries=registry.example.com
openshift_docker_insecure_registries=registry.example.com
osm_etcd_image=rhel7/etcd
openshift_logging_image_prefix=registry.example.com/openshift3/

openshift_hosted_router_selector='region=infra,router=true'
openshift_master_default_subdomain=app.example.com


# host group for masters
[masters]
master.example.com
# host group for etcd
[etcd]
master.example.com

# host group for nodes, includes region info
[nodes]
master.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true
node1.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true
node2.example.com openshift_node_labels="{'region': 'infra', 'zone': 'default'}" openshift_schedulable=true

[nfs]
nfs.example.com

 

11.升级前的检查

ansible-playbook --tags pre_upgrade /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade.yml 

遇到一个问题说master,node1,node2的节点的Docker的版本需要1.12以上,但master,node1,node2的docker版本已经是1.13.1

解决办法,修改/etc/yum.conf文件,屏蔽下面这句话,跳过检查

#exclude= docker*1.20*  docker*1.19*  docker*1.18*  docker*1.17*  docker*1.16*  docker*1.15*  docker*1.14*  docker*1.13* 

 

12.升级

升级master节点

ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_control_plane.yml | tee /tmp/upgrade_control_plane_to_3_7.log;

升级完后显示

TASK [Warn if shared-resource-viewer could not be updated] **************************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml:134
skipping: [master.example.com] => {"changed": false, "skip_reason": "Conditional result was False"}
META: ran handlers

PLAY RECAP **************************************************************************************************************************************************************
localhost                  : ok=26   changed=0    unreachable=0    failed=0   
master.example.com         : ok=410  changed=87   unreachable=0    failed=0   
nfs.example.com            : ok=33   changed=2    unreachable=0    failed=0   
node1.example.com          : ok=50   changed=2    unreachable=0    failed=0   
node2.example.com          : ok=50   changed=2    unreachable=0    failed=0   

检查

[root@master ansible]# oc get nodes
NAME                 STATUS    AGE       VERSION
master.example.com   Ready     3h        v1.7.6+a08f5eeb62
node1.example.com    Ready     3h        v1.6.1+5115d708d7
node2.example.com    Ready     3h        v1.6.1+5115d708d7


[root@master ansible]# oc get pods --all-namespaces
NAMESPACE              NAME                         READY     STATUS             RESTARTS   AGE
default                docker-registry-2-94zld      1/1       Running            0          6m
default                registry-console-2-lmhgx     1/1       Running            0          6m
default                router-2-8hnmz               1/1       Running            0          6m
default                router-2-g6tlm               1/1       Running            0          5m
kube-service-catalog   apiserver-z6nmz              1/1       Running            4          2h
kube-service-catalog   controller-manager-d2jgc     0/1       CrashLoopBackOff   9          2h
openshift-infra        hawkular-cassandra-1-gp4n9   1/1       Running            0          10m
openshift-infra        hawkular-metrics-4j828       1/1       Running            1          2h
openshift-infra        heapster-rgwrw               1/1       Running            6          3h


[root@master ansible]# oc version
oc v3.7.64
kubernetes v1.7.6+a08f5eeb62
features: Basic-Auth GSSAPI Kerberos SPNEGO

Server https://master.example.com:8443
openshift v3.7.64
kubernetes v1.7.6+a08f5eeb62

 

升级node节点

ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_nodes.yml -e openshift_upgrade_nodes_serial=1 | tee /tmp/upgrade_node_to_3_7.log;

升级完成后提示

TASK [include] **********************************************************************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml:83
skipping: [node2.example.com] => {"changed": false, "skip_reason": "Conditional result was False"}
META: ran handlers

PLAY RECAP **************************************************************************************************************************************************************
localhost                  : ok=12   changed=0    unreachable=0    failed=0   
master.example.com         : ok=76   changed=4    unreachable=0    failed=0   
nfs.example.com            : ok=33   changed=2    unreachable=0    failed=0   
node1.example.com          : ok=174  changed=30   unreachable=0    failed=0   
node2.example.com          : ok=174  changed=30   unreachable=0    failed=0   

检查

[root@master ansible]# oc get nodes
NAME                 STATUS    AGE       VERSION
master.example.com   Ready     3h        v1.7.6+a08f5eeb62
node1.example.com    Ready     3h        v1.7.6+a08f5eeb62
node2.example.com    Ready     3h        v1.7.6+a08f5eeb62

[root@master ansible]# oc get pods --all-namespaces
NAMESPACE              NAME                         READY     STATUS    RESTARTS   AGE
default                docker-registry-2-x7sqf      1/1       Running   0          4m
default                registry-console-2-mslwb     1/1       Running   0          4m
default                router-2-79qjr               1/1       Running   0          8m
default                router-2-8hnmz               1/1       Running   0          20m
kube-service-catalog   apiserver-z6nmz              1/1       Running   4          3h
kube-service-catalog   controller-manager-d2jgc     0/1       Error     12         2h
openshift-infra        hawkular-cassandra-1-qg2bj   1/1       Running   0          8m
openshift-infra        hawkular-metrics-bqqm8       0/1       Running   0          4m
openshift-infra        heapster-zt87w               1/1       Running   0          8m

 

posted @ 2018-11-20 10:29  ericnie  阅读(739)  评论(0编辑  收藏  举报