OCP升级(3.7 ->3.9)

 坑多,搞了好多次。

1./etc/ansible/hosts

[OSEv3:children]
masters
nodes
etcd
nfs

[OSEv3:vars]
ansible_ssh_user=root
openshift_deployment_type=openshift-enterprise
openshift_release=v3.9

osm_use_cockpit=true
osm_cockpit_plugins=['cockpit-kubernetes']
openshift_cockpit_deployer_prefix='openshift3/'
openshift_cockpit_deployer_version='v3.9.43'

osm_cluster_network_cidr=10.128.0.0/14
openshift_portal_net=172.30.0.0/16
openshift_master_api_port=8443
openshift_master_console_port=8443

openshift_hosted_registry_storage_kind=nfs
openshift_hosted_registry_storage_access_modes=['ReadWriteMany']
openshift_hosted_registry_storage_nfs_directory=/exports
openshift_hosted_registry_storage_nfs_options='*(rw,root_squash)'
openshift_hosted_registry_storage_volume_name=registry
openshift_hosted_registry_storage_volume_size=10Gi
oreg_url=registry.example.com/openshift3/ose-\${component}:\${version}
openshift_docker_additional_registries=registry.example.com
openshift_docker_insecure_registries=registry.example.com
openshift_docker_blocked_registries=registry.access.redhat.com,docker.io
openshift_image_tag=v3.9.43

openshift_enable_service_catalog=true
openshift_service_catalog_image_prefix=registry.example.com/openshift3/ose-
openshift_service_catalog_image_version=v3.9.43

ansible_service_broker_image_prefix=registry.example.com/openshift3/ose-
ansible_service_broker_etcd_image_prefix=registry.example.com/rhel7/
ansible_service_broker_selector={"region": "infra"}

openshift_template_service_broker_namespaces=['openshift']
template_service_broker_selector={"region": "infra"}
template_service_broker_prefix=registry.example.com/openshift3/ose-

openshift_hosted_manage_registry=false

oreg_url=registry.example.com/openshift3/ose-${component}:${version}
openshift_examples_modify_imagestreams=true
openshift_clock_enabled=true

openshift_metrics_storage_kind=nfs
openshift_metrics_install_metrics=true
openshift_metrics_storage_access_modes=['ReadWriteOnce']
openshift_metrics_storage_host=nfs.example.com
openshift_metrics_storage_nfs_directory=/exports
openshift_metrics_storage_volume_name=metrics
openshift_metrics_storage_volume_size=10Gi
openshift_metrics_hawkular_hostname=hawkular-metrics.apps.example.com
#openshift_metrics_cassandra_storage_type=emptydir
openshift_metrics_image_prefix=registry.example.com/openshift3/
openshift_hosted_metrics_deploy=true
openshift_hosted_metrics_public_url=https://hawkular-metrics.apps.example.com/hawkular/metrics
openshift_metrics_image_version=v3.9.43

openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}]
# Default login account: admin / handhand
openshift_master_htpasswd_users={'admin': '$apr1$gfaL16Jf$c.5LAvg3xNDVQTkk6HpGB1'}


#openshift_repos_enable_testing=true
openshift_disable_check=docker_image_availability,disk_availability,memory_availability,docker_storage

docker_selinux_enabled=false
openshift_docker_options=" --selinux-enabled --insecure-registry 172.30.0.0/16 --log-driver json-file --log-opt max-size=50M --log-opt max-file=3 --insecure-registry registry.example.com --add-registry registry.example.com"
osm_etcd_image=rhel7/etcd
openshift_logging_image_prefix=registry.example.com/openshift3/

openshift_hosted_router_selector='region=infra,router=true'
openshift_master_default_subdomain=app.example.com

openshift_web_console_prefix=registry.example.com/openshift3/ose-
openshift_web_console_version=v3.9.43

# host group for masters
[masters]
master.example.com
# host group for etcd
[etcd]
master.example.com

# host group for nodes, includes region info
[nodes]
master.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true
node1.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true
node2.example.com openshift_node_labels="{'region': 'infra', 'zone': 'default', 'node': 'true'}" openshift_schedulable=true

[nfs]
nfs.example.com

 

2.有几个镜像需要retag

docker pull registry.example.com/openshift3/registry-console:v3.9.43 
docker tag registry.example.com/openshift3/registry-console:v3.9.43 registry.example.com/openshift3/registry-console:v3.9
docker push registry.example.com/openshift3/registry-console:v3.9


docker pull  registry.example.com/openshift3/ose-deployer:v3.9.43
docker tag registry.example.com/openshift3/ose-deployer:v3.9.43 registry.example.com/openshift3/ose-deployer:v3.9.51
docker push registry.example.com/openshift3/ose-deployer:v3.9.51

docker pull  registry.example.com/openshift3/ose-pod:v3.9.43
docker tag registry.example.com/openshift3/ose-pod:v3.9.43 registry.example.com/openshift3/ose-pod:v3.9.51
docker push registry.example.com/openshift3/ose-pod:v3.9.51

 

更新主节点

ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_9/upgrade_control_plane.yml | tee /tmp/upgrade_control_plane_to_3_9.log;

 

完成后状态

TASK [openshift_master : Wait for master API to come back online] *******************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/roles/openshift_master/tasks/restart.yml:6
ok: [master.example.com] => {"changed": false, "elapsed": 10, "failed": false, "path": null, "port": 8443, "search_regex": null, "state": "started"}

TASK [openshift_master : restart master controllers] ********************************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/roles/openshift_master/tasks/restart.yml:14
changed: [master.example.com] => {"attempts": 1, "changed": true, "cmd": ["systemctl", "restart", "atomic-openshift-master-controllers"], "delta": "0:00:00.738269", "end": "2018-11-24 21:47:24.938854", "failed": false, "rc": 0, "start": "2018-11-24 21:47:24.200585", "stderr": "", "stderr_lines": [], "stdout": "", "stdout_lines": []}
META: ran handlers

PLAY RECAP **************************************************************************************************************************************************************
localhost                  : ok=28   changed=0    unreachable=0    failed=0   
master.example.com         : ok=798  changed=197  unreachable=0    failed=0   
nfs.example.com            : ok=1    changed=0    unreachable=0    failed=0   


[root@master ~]# oc get pods --all-namespaces
NAMESPACE                           NAME                          READY     STATUS             RESTARTS   AGE
default                             docker-registry-2-8kc4s       1/1       Running            0          16m
default                             docker-registry-2-qh9vq       1/1       Running            0          16m
default                             docker-registry-2-xdz55       1/1       Running            2          3h
default                             registry-console-2-qtj4j      1/1       Running            0          16m
default                             router-4-ctlwd                1/1       Running            0          7m
default                             router-4-kvbc6                1/1       Running            0          6m
kube-service-catalog                apiserver-bp4j4               1/1       Running            0          3m
kube-service-catalog                controller-manager-m82nr      0/1       CrashLoopBackOff   4          3m
openshift-ansible-service-broker    asb-1-deploy                  0/1       Error              0          2m
openshift-ansible-service-broker    asb-etcd-1-deploy             0/1       Error              0          2m
openshift-infra                     hawkular-cassandra-1-6qmm9    1/1       Running            2          3h
openshift-infra                     hawkular-metrics-fmj5n        0/1       CrashLoopBackOff   38         3h
openshift-infra                     heapster-8cb76                0/1       Error              1          16m
openshift-template-service-broker   apiserver-7gnvj               0/1       Error              3          2m
openshift-template-service-broker   apiserver-kqqx7               1/1       Running            0          2m
openshift-template-service-broker   apiserver-smzqn               0/1       Error              3          2m
openshift-web-console               webconsole-55d596f44d-n6gf8   1/1       Running            0          9m

 

[root@master ~]# oc get node
NAME                 STATUS    ROLES     AGE       VERSION
master.example.com   Ready     master    19h       v1.9.1+a0ce1bc657
node1.example.com    Ready     <none>    19h       v1.7.6+a08f5eeb62
node2.example.com    Ready     <none>    19h       v1.7.6+a08f5eeb62

 

更新node节点

ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_9/upgrade_nodes.yml -e openshift_upgrade_nodes_serial=1 | tee /tmp/upgrade_node_to_3_9.log;

 

任务结束后输出

TASK [openshift_excluder : Enable openshift excluder] *******************************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/roles/openshift_excluder/tasks/exclude.yml:24
changed: [node1.example.com] => {"changed": true, "cmd": ["/sbin/atomic-openshift-excluder", "exclude"], "delta": "0:00:00.049623", "end": "2018-11-25 09:04:05.773310", "failed": false, "rc": 0, "start": "2018-11-25 09:04:05.723687", "stderr": "", "stderr_lines": [], "stdout": "", "stdout_lines": []}
changed: [node2.example.com] => {"changed": true, "cmd": ["/sbin/atomic-openshift-excluder", "exclude"], "delta": "0:00:00.051837", "end": "2018-11-25 09:04:05.158001", "failed": false, "rc": 0, "start": "2018-11-25 09:04:05.106164", "stderr": "", "stderr_lines": [], "stdout": "", "stdout_lines": []}
META: ran handlers
META: ran handlers

PLAY RECAP **************************************************************************************************************************************************************
localhost                  : ok=12   changed=0    unreachable=0    failed=0   
master.example.com         : ok=76   changed=4    unreachable=0    failed=0   
nfs.example.com            : ok=28   changed=2    unreachable=0    failed=0   
node1.example.com          : ok=158  changed=45   unreachable=0    failed=0   
node2.example.com          : ok=158  changed=46   unreachable=0    failed=0   

 

[root@master ~]# oc get nodes
NAME                 STATUS    ROLES     AGE       VERSION
master.example.com   Ready     master    12h       v1.9.1+a0ce1bc657
node1.example.com    Ready     <none>    12h       v1.9.1+a0ce1bc657
node2.example.com    Ready     <none>    12h       v1.9.1+a0ce1bc657

 

heapster,metrics为什么不见了,还需要去查

[root@master ~]# oc get pods --all-namespaces
NAMESPACE                           NAME                          READY     STATUS             RESTARTS   AGE
default                             router-4-kvbc6                1/1       Running            0          18m
kube-service-catalog                apiserver-bp4j4               1/1       Running            0          15m
kube-service-catalog                controller-manager-m82nr      0/1       CrashLoopBackOff   7          15m
openshift-ansible-service-broker    asb-1-deploy                  0/1       Error              0          14m
openshift-ansible-service-broker    asb-etcd-1-deploy             0/1       Error              0          14m
openshift-template-service-broker   apiserver-7gnvj               1/1       Running            7          14m
openshift-template-service-broker   apiserver-kqqx7               1/1       Running            0          14m
openshift-template-service-broker   apiserver-smzqn               1/1       Running            7          14m
openshift-web-console               webconsole-55d596f44d-n6gf8   1/1       Running            0          21m

 

更新脚本不要反复执行,遇到的问题包括

 

  • 导入模板失败
TASK [openshift_examples : Import RHEL streams] *************************************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/roles/openshift_examples/tasks/main.yml:58
FAILED - RETRYING: Import RHEL streams (3 retries left).
FAILED - RETRYING: Import RHEL streams (2 retries left).
FAILED - RETRYING: Import RHEL streams (1 retries left).

倒入模板失败,目前暂时没理

 

  • RETRYING: Poll for OpenShift pod deployment success
TASK [openshift_hosted : Poll for OpenShift pod deployment success] *****************************************************************************************************
task path: /usr/share/ansible/openshift-ansible/roles/openshift_hosted/tasks/wait_for_pod.yml:23
FAILED - RETRYING: Poll for OpenShift pod deployment success (60 retries left).
FAILED - RETRYING: Poll for OpenShift pod deployment success (59 retries left).

看了一下是在docker-registry部署完后的检查,修改hosts文件,加入

openshift_hosted_manage_registry=false

 

  • 验证TSB是否运行
TASK [template_service_broker : Verify that TSB is running] ********************************************************************************
FAILED - RETRYING: Verify that TSB is running (120 retries left).
FAILED - RETRYING: Verify that TSB is running (119 retries left).

解决办法,修改service_broker在infra的节点上运行。(之前是node=true节点上)

template_service_broker_selector={"region": "infra"}

 

  • upgrade storage

脚本不能反复执行

 

 

posted @ 2018-11-25 08:19  ericnie  阅读(1034)  评论(0编辑  收藏  举报