监控pbs运行状况
# 监控内存使用情况
job_id=163997
workdir=/share_bio/
echo "population_sizes" >> $workdir/pbs/pbs.job.$job_id.mem_used.logs
while [ 1 -eq 1 ]
do
msg=`qstat -f $job_id`
chars_count=${#msg}
if (($chars_count<100))
then
break
else
qstat -f $job_id | grep resources_used.mem >> $workdir/pbs/pbs.job.$job_id.mem_used.logs
sleep 60
fi
done
# 监控cpu使用情况
tracejob -n 365 2222
Job: 2222.centos64
07/30/2014 18:50:48 S enqueuing into batch, state 1 hop 1
07/30/2014 18:50:48 S Job Modified at request of root@centos64
07/30/2014 18:50:48 L Job Run
07/30/2014 18:50:48 S Job Run at request of root@centos64
07/30/2014 18:50:48 S Not sending email: User does not want mail of this type.
07/30/2014 18:50:48 A queue=batch
07/30/2014 18:50:48 A user=aimin group=aimin jobname=cow5utr01 queue=batch ctime=1406717448 qtime=1406717448 etime=1406717448 start=1406717448 owner=aimin@centos64 exec_host=centos64/1 Resource_List.neednodes=1:ppn=1
Resource_List.nodect=1 Resource_List.nodes=1:ppn=1 Resource_List.walltime=1440:00:00
07/30/2014 19:15:14 M scan_for_terminated: job 2222.centos64 task 1 terminated, sid=3480
07/30/2014 19:15:14 M job was terminated
07/30/2014 19:15:15 S Exit_status=0 resources_used.cput=00:24:21 resources_used.mem=9708kb resources_used.vmem=254684kb resources_used.walltime=00:24:26
07/30/2014 19:15:15 S Not sending email: User does not want mail of this type.
07/30/2014 19:15:15 M obit sent to server
07/30/2014 19:15:15 S on_job_exit valid pjob: 2222.centos64 (substate=50)
07/30/2014 19:15:15 A user=aimin group=aimin jobname=cow5utr01 queue=batch ctime=1406717448 qtime=1406717448 etime=1406717448 start=1406717448 owner=aimin@centos64 exec_host=centos64/1 Resource_List.neednodes=1:ppn=1
Resource_List.nodect=1 Resource_List.nodes=1:ppn=1 Resource_List.walltime=1440:00:00 session=3480 end=1406718915 Exit_status=0 resources_used.cput=00:24:21 resources_used.mem=9708kb resources_used.vmem=254684kb
resources_used.walltime=00:24:26
07/30/2014 19:15:20 M removed job script
07/30/2014 19:20:23 S dequeuing from batch, state COMPLETE