linux下查找最耗iot的进程
很多时候发现linux系统输入一些命令很慢,用top查看IOwait占用CPU很高,top下面列出的进程中,不论按cpu排序、内存排序、时间排序,都看不出来到底哪个进程(哪个分区)占用ipwait最高。
Waiting
|
那么到底怎么知道是哪个进程导致iowait过高?
系统日志是没有记录这些内容的,但是内核中有相应的方式。Linux 内核里提供了一个 block_dump 参数用来把 block 读写(WRITE/READ)状况 dump 到日志里,这样可以通过 dmesg 命令来查看。
看一下介绍
|
关于这个统计工具,在网上只找到了一个用Perl写的来,但是有的环境并没有Perl解释器,一下是我用busybox shell实现的一个同样的程序:
#!/bin/sh #Reference: # http://www.kernel.org/doc/Documentation/laptops/laptop-mode.txt # http://www.zarafa.com/wiki/index.php/Monitoring_Disk_IO_per_process [ "$1" = '-h' -o "$1" = '--help' ]&& ( echo "iodump - Compute per-PID I/O stats for Linux when iotop/pidstat/iopp are not available." echo "Run $0 -a :show all list; The default top 10" echo "Run $0 -h :show help" echo "Run $0 and wait a moment and CTRL-C to kill stop it." )&& exit top=10; [ "$1" = "-a" ] && top=32768 def=$IFS f="/tmp/$(date +'%s').log" touch $f output() { echo 0 > /proc/sys/vm/block_dump end=$(date +'%s') awk -v f=$f -v top=$top '/READ|WRITE|dirtied/{ task=$1; pid=$2 ; activity=$3; where=$4; dev=$5; if (task) { stat[task,pid,tolower(activity)]++; if (device[task,pid]) { split(device[task,pid],b,DeviceSEP); if(dev in b)device[task,pid]=device[task,pid] DeviceSEP dev ; }else device[task,pid]=dev; } } BEGIN{ DeviceSEP="," r[0]=0; s[0]=0; printf("%-20s%-10s%-10s%-10s%-10s%-10s%-10s\n","TASK","PID","TOTAL","READ","WRITE","DIRTY","DEVICES"); } function uniq(v,i){ for( j in r) { if(v ==r[j]) v-=0.001*i; } return v; } function deuniq(v) { vv=int(v+1); if(1 == vv -v) return v; return vv } function intArry() { i=0; for (v in stat) { vv=uniq(stat[v],i); r[i]=vv; s[vv]=v #print v,"===" r[i]; i++; } } function processH(v) { split(v,a,SUBSEP); #printf("\n---stat[%s]=%s a[1]=%s a[2]=%s a[3]=%s---\n\n",v,stat[v],a[1],a[2],a[3]);#continue; printf("%-20s%-10s%-10s",a[1],a[2],stat[v]); if (a[3] == "read") printf("%-10s",stat[v]); else printf("%-10s","0") if (a[3] == "write") printf("%-10s",stat[v]); else printf("%-10s","0") if (a[3] == "dirtied")printf("%-10s",stat[v]); else printf("%-10s","0") printf("%-10s\n",device[a[1],a[2]]); } function raw() { for(v in stat) { processH(v); } } END{ intArry(); n=asort(r) for (i=n;i>0;--i) { #print r[i] "==" s[r[i]]; if(n-i >top)break; processH(s[r[i]]) } #raw(); print "Max task:" n }' $f rm -f $f echo "Total times $(expr $end - $start) seconds" exit } SIGTERM=15 SIGINT=2 SIGTSTP=18 trap 'output' $SIGINT $SIGTERM $SIGTSTP parse1() { while read line do IFS='@' set `echo $line|sed -nr '/READ|WRITE|dirtied/{s/^(.+)\(([0-9]+)\): ([a-zA-Z]+) [inodeblock]{5} ([0-9]+) .*on (.*)/\1@\2@\3@\4@\5/p}'` >/dev/null 2>&1 task=$1; pid=$2; activity=$3; where=$4; dev=$5 #activity:READ/WRITE/dirtied echo "$task $pid $activity $where $dev" >>$f IFS=$def done } start=$(date +'%s') echo 1 > /proc/sys/vm/block_dump while true; do sleep 1; dmesg -c; done |parse1
参考:
http://www.zarafa.com/wiki/index.php/Monitoring_Disk_IO_per_process