HPL手动安装
#!/bin/bash . /etc/profile.d/modules.sh CDDIR="$(cd "$(dirname "$0")" ; pwd -P)" #软件安装包 BISHENG="BiSheng-compiler-2.5.0.1-aarch64-linux.tar.gz" HYPERMPI="Hyper-MPI_1.2.1_Sources.tar.gz" KML="BoostKit-kml_1.7.0.zip" OSU="osu-micro-benchmarks-5.9.tar.gz" HPL="hpl-2.3.tar.gz" #安装软件主目录 inst_dir="/share/benchmark" MODULE_HOME="${inst_dir}/modulefiles" BISHENG_HOME="${inst_dir}/compilers/bisheng/2.5.0" HPL_HOME="${inst_dir}/hpl" HMPI_HOME="${inst_dir}/mpi/hmpi/1.2.1/bisheng2.5.0/hmpi" KML_HOME="${inst_dir}/kml/1.7.0" OSU_HOME="${inst_dir}/osu" mkdir -p $MODULE_HOME $BISHENG_HOME $HPL_HOME $HMPI_HOME $KML_HOME $OSU_HOME #安装依赖软件包 yum install -y install autoconf automake libtool glibc-devel.aarch64 \ gcc gcc-c++.aarch64 flex numactl binutils systemd-devel \ valgrind perl-Data-Dumper environment-modules libatomic autoconf automake \ libtool glibc-devel.aarch64 gcc gcc-c++.aarch64 flex numactl binutils systemd-devel \ valgrind perl-Data-Dumper &>/dev/null # 安装编译器 cat>${BISHENG_HOME}/bisheng_modulefile<<EOF #%Module set version 2.5.0 set prefix ${BISHENG_HOME} setenv CC clang setenv CXX clang++ setenv FC flang prepend-path PATH \${prefix}/compilers/bisheng/2.5.0/bin prepend-path INCLUDE \${prefix}/compilers/bisheng/2.5.0/include prepend-path LD_LIBRARY_PATH \${prefix}/compilers/bisheng/2.5.0/lib EOF ln -s $BISHENG_HOME/bisheng_modulefile $MODULE_HOME/ tar --no-same-owner -xf ${CDDIR}/${BISHENG} --strip 1 -C ${BISHENG_HOME} find ${inst_dir}/compilers/bisheng/2.5.0/ -type f -perm 440 -exec chmod 444 {} \; find ${inst_dir}/compilers/bisheng/2.5.0/ -type f -perm 550 -exec chmod 555 {} \; module load ${BISHENG_HOME}/bisheng_modulefile clang -v # 安装HyperMPI cat>$HMPI_HOME/hmpi_modulefile<<EOF #%Module set version 1.2.1 set prefix ${inst_dir}/mpi/hmpi/1.2.1/bisheng2.5.0 setenv MPI_DIR \${prefix}/hmpi setenv MPI_INC \${prefix}/hmpi/include setenv MPI_LIB \${prefix}/hmpi/lib setenv OPAL_PREFIX \${prefix}/hmpi/ prepend-path PATH \${prefix}/hmpi/bin prepend-path INCLUDE \${prefix}/hmpi/include prepend-path LD_LIBRARY_PATH \${prefix}/hmpi/lib prepend-path PATH \${prefix}/hucx/bin prepend-path INCLUDE \${prefix}/hucx/include prepend-path LD_LIBRARY_PATH \${prefix}/hucx/lib EOF mkdir -p ${CDDIR}/hmpi/ tar --no-same-owner -xf ${CDDIR}/${HYPERMPI} --strip 1 -C ${CDDIR}/hmpi/ cd ${CDDIR}/hmpi/ sh hmpi-autobuild.sh -c clang -t release -m hmpi.tar.gz -u hucx.tar.gz -g xucg.tar.gz -p ${inst_dir}/mpi/hmpi/1.2.1/bisheng2.5.0 ln -s $HMPI_HOME/hmpi_modulefile $MODULE_HOME/ rm -rf ${CDDIR}/hmpi module load $HMPI_HOME/hmpi_modulefile mpicc -v #安装KML mkdir -p ${inst_dir}/kml/1.7.0/ unzip $KML rpm --force -ivh boostkit-kml-1.7.0-1.aarch64.rpm --nodeps mv /usr/local/kml/* ${inst_dir}/kml/1.7.0 cat>${KML_HOME}/kml_modulefile<<EOF #%Module set version 1.7.0 set prefix ${KML_HOME} prepend-path LD_LIBRARY_PATH \${prefix}/lib/kblas/omp EOF ln -s ${KML_HOME}/kml_modulefile $MODULE_HOME/ module load $KML_HOME/kml_modulefile #安装OSU tar --no-same-owner -xf ${CDDIR}/${OSU} cd ${CDDIR}/osu-micro-benchmarks-5.9 ./configure --prefix=${inst_dir}/osu CC=`which mpicc` CXX=`which mpicxx` && make && make install rm -rf ${CDDIR}/osu-micro-benchmarks-5.9 cat>${inst_dir}/osu/osu.sh<<EOF #1.测试项BW mpirun --allow-run-as-root -oversubscribe -np 2 -N 1 \\ --host gpu01,gpu02 \\ -mca pml ucx -mca btl ^vader,tcp,openib,uct -mca io romio321 \\ -x UCX_TLS=self,sm,rc \\ -x UCX_NET_DEVICES=mlx5_0:1 \\ ${inst_dir}/osu/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bw #2.测试项Latency mpirun --allow-run-as-root -oversubscribe -np 2 -N 1 \\ --host gpu01,gpu02 -mca pml ucx -mca btl ^vader,tcp,openib,uct -mca io romio321 \\ -x UCX_TLS=self,sm,rc \\ -x UCX_NET_DEVICES=mlx5_0:1 \\ ${inst_dir}/osu/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_latency #3.测试项Bcast mpirun --allow-run-as-root -oversubscribe -np 2 -N 1 \\ --host gpu01,gpu02 -mca pml ucx -mca btl ^vader,tcp,openib,uct -mca io romio321 \\ -x UCX_TLS=self,sm,rc \\ -x UCX_NET_DEVICES=mlx5_0:1 \\ ${inst_dir}/osu/libexec/osu-micro-benchmarks/mpi/collective/osu_bcast #4.测试项allreduce mpirun --allow-run-as-root -oversubscribe -np 2 -N 1 \\ --host gpu01,gpu02 \\ -mca pml ucx -mca btl ^vader,tcp,openib,uct -mca io romio321 \\ -x UCX_TLS=self,sm,rc \\ -x UCX_NET_DEVICES=mlx5_0:1 \\ ${inst_dir}/osu/libexec/osu-micro-benchmarks/mpi/collective/osu_allreduce #5.测试项alltoall mpirun --allow-run-as-root -oversubscribe -np 2 -N 1 \\ --host gpu01,gpu02 \\ -mca pml ucx -mca btl ^vader,tcp,openib,uct -mca io romio321 \\ -x UCX_TLS=self,sm,rc \\ -x UCX_NET_DEVICES=mlx5_0:1 \\ ${inst_dir}/osu/libexec/osu-micro-benchmarks/mpi/collective/osu_alltoall EOF #安装HPL tar --no-same-owner -xf ${CDDIR}/${HPL} --strip 1 -C ${HPL_HOME} cat>${HPL_HOME}/Make.arm<<EOF SHELL = /bin/sh CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch ARCH = arm TOPdir = ${HPL_HOME} INCdir = \$(TOPdir)/include BINdir = \$(TOPdir)/bin/\$(ARCH) LIBdir = \$(TOPdir)/lib/\$(ARCH) HPLlib = \$(LIBdir)/libhpl.a MPdir = ${HMPI_HOME} MPinc = -I\$(MPdir)/include MPlib = -L\$(MPdir)/lib/ -lmpi LAdir = LAinc = LAlib = -L${KML_HOME}/lib/kblas/omp/ -lkblas F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle HPL_INCLUDES = -I\$(INCdir) -I\$(INCdir)/\$(ARCH) \$(LAinc) \$(MPinc) HPL_LIBS = \$(HPLlib) \$(LAlib) \$(MPlib) HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT HPL_DEFS = \$(F2CDEFS) \$(HPL_OPTS) \$(HPL_INCLUDES) CC = clang CCNOOPT = \$(HPL_DEFS) CCFLAGS = \$(HPL_DEFS) -fomit-frame-pointer -Ofast -ffast-math -ftree-vectorize -mcpu=tsv110 -funroll-loops -W -Wall -fopenmp LINKER = flang LINKFLAGS = \$(CCFLAGS) ARCHIVER = ar ARFLAGS = r RANLIB = echo EOF cd ${HPL_HOME} make arch=arm cat>${HPL_HOME}/bin/arm/run_hpl_cluster_test.sh<<EOF #!/bin/bash . /etc/profile.d/modules.sh module load ${BISHENG_HOME}/bisheng_modulefile module load $HMPI_HOME/hmpi_modulefile module load $KML_HOME/kml_modulefile TIME="\$(date +%F_%T)" HOSTFILE="/share/benchmark/hpl/bin/arm/hostfile" NODE_SUM="\$(cat \$HOSTFILE| wc -l)" mpirun --allow-run-as-root \\ -x LD_LIBRARY_PATH -x PATH -x PWD \\ -map-by ppr:40:node:pe=4 -x OM_NUM_THREADS=4 \\ -hostfile \$HOSTFILE \\ -mca pml ucx \\ -mca btl ^vader,tcp,openib,uct \\ -mca io romio321 \\ -x UCX_TLS=self,sm,ud \\ -x UCX_NET_DEVICES=mlx5_0:1 \\ -x OMP_PLACES=cores -x OMP_WAIT_POLICY=active -x OMP_PROC_BIND=close \\ -x UCX_REQ_TIMEOUT=30000s \\ ${HPL_HOME}/bin/arm/xhpl | tee hpl_\${NODE_SUM}node_\${TIME}.log EOF