获取服务器分区占用最大的文件

#!/usr/bin/python
#-*- coding: utf-8 -*-
import time
import re
import os
from os.path import join, getsize
from sys import stdout
import getopt,sys
#参数
def pygrep(argv):
   grep = ''
   sgrep = ''
   Disk_usage = ''
   The_number = ''
   Space_occupied_files = ''
   km = ''
   lv = ''
   try:
      opts, args = getopt.getopt(argv,"hg:e:d:t:o:k:l:",["grep=","egrep=","Disk_usage=","The_number=","Space_occupied_files=","km=","lv="])
   except getopt.GetoptError:
      print '''-g\t--grep 设置需要过滤掉的字符,默认不过滤
-e\t--egrep 自定义grep参数,默认为-Ev
-d\t--Disk_usage 设置空间百分比,默认80%
-t\t--The_number 设置显示的条数,all显示全部,默认10条
-o\t--Space_occupied_files 设置已占用空间的百分比显示,默认100%
-k\t--km 查找文件的大小,默认10M
-l\t--lv 指定磁盘,默认根据设置的空间百分比查找磁盘,当使用此参数则-d参数失效'''
       
      sys.exit(2)
   for opt, arg in opts:
      if opt == '-h':
          print '''-g\t--grep 设置需要过滤掉的字符,默认不过滤
-e\t--egrep 自定义grep参数,默认为-Ev
-d\t--Disk_usage 设置空间百分比,默认80%
-t\t--The_number 设置显示的条数,all显示全部,默认10条
-o\t--Space_occupied_files 设置已占用空间的百分比显示,默认100%
-k\t--km 查找文件的大小,默认10M
-l\t--lv 指定磁盘,默认根据设置的空间百分比查找磁盘,当使用此参数则-d参数失效'''
          sys.exit()
      elif opt in ("-g", "--grep"):
         grep = arg
      elif opt in ("-e", "--egrep"):
         sgrep = arg
      elif opt in ("-d", "--Disk_usage"):
         Disk_usage = arg
      elif opt in ("-t", "--The_number"):
         The_number = arg
      elif opt in ("-o", "--Space_occupied_files"):
         Space_occupied_files= arg
      elif opt in ("-k", "--km"):
         km=arg
      elif opt in ("-l", "--lv"):
          lv=arg
   return grep,sgrep,Disk_usage,The_number,Space_occupied_files,km,lv

#转换
def GBK(dir,rootdisk,The_number):
    tmp=[]
    num=0
    lnum=len(dir.split('\n'))
    if lnum == 1:
        num=1
        tmp.append(dir.split(' ')[1])
    elif lnum > 1:
        for i in dir.split('\n'):
            if i != '':
                if The_number != None:
                    if len(tmp) >= The_number:
                        break
                num += 1
                tmp.append(i.split(' ')[1])
        
    cmd="du -h %s"%' '.join(tmp)
    tmp=os.popen(cmd).read()
        
    cmd = "df -h | grep '%s'"%rootdisk
    ttmp=os.popen(cmd).read()
    return ttmp,tmp,num

#主要方法
def serverdisk(Disk_usage,km,Space_occupied_files,The_number,lgrep,lv):
    
    tmp=[]
    
    #获取硬盘空间
    cmd = "df -P | sed '1d' "
    if lv != "":
        cmd = "df -P | sed '1d' | grep -E '%s'"%lv
        #将Disk_usage设置为0
        Disk_usage=0
    disk=os.popen(cmd).read()
    disklist=[]
    rootdisk=None
    rootdiskM=None
    rootdiskdir=None

    #转换成列表
    for i in disk.split('\n'):
        disklist.append(i.split())
    
    for i in disklist:
        if len(i) == 0:
            continue
        #已用空间大于等于Disk_usage的
        if int(i[4].replace('%','')) >= Disk_usage :
                #分区
                rootdisk=i[0]
                #分区已用空间
                rootdiskM=i[2]
                #分区的目录
                rootdiskdir=i[5]
                #获取分区下的目录
                cmd = "ls %s"%rootdiskdir
                dir=os.popen(cmd).read()
                
                dirdisk=[]
                
                #获取分区的目录
                for ii in dir.split():
                    cmd = "df -P %s/%s | sed '1d' | awk '{print $1}'"%(rootdiskdir,ii)
                    dirsystem=os.popen(cmd).read()
                    #判断目录是否为当前获取的分区
                    if dirsystem.split()[0] == rootdisk.split()[0]:
                        dirdisk.append('%s/%s'%(rootdiskdir,ii))
                        
                if dirdisk:
                    dirdisk=' '.join(dirdisk)
                    #获取空间小于等于Disk_usage,文件大于10M,当已用空间大于等于rootdiskM时停止寻找
                    cmd = "find %s -size +%s -exec du {} \;%s| sort -rn|awk '{if($2!=\".\"){num=$1+num;z=num/mx*100;if(z <= %s){print $1,$2}else{ print $1,$2;exit;}}}'num=0 z=0 mx=%s "%(dirdisk,km,lgrep,Space_occupied_files,rootdiskM)
                    tt=os.popen(cmd).read()
                    if '' == tt.replace('\n',''):
                        tmp.append('没有获取到,调整find -size参数重试')
                        return tmp
                        break
                    if "/" not in tt:
                        tmp.append('无法获取到请检查参数是否正确')
                        return tmp 
                        break
                    #将上面获取到的文件信息转换为GBK
                    
                    GBKrootdisk,tttmp,xians_num=GBK(tt,rootdisk,The_number)
                    zong_num_tmp=[] 
                    for ii in tt.split('\n'):
                        if ii != '' and len(ii) > 0:
                            zong_num_tmp.append(ii)
                    zong_num = len(zong_num_tmp)
                    tmp.append('显示了 %s 条,共有 %s 条\r\n%s%s'%(xians_num,zong_num,GBKrootdisk.replace('\r\n',''),tttmp))
    return tmp

#初始化参数
def start():
            ttmp=0
            tttmp=[]
            #条数
            The_number=int('10')
            #文件占用
            Space_occupied_files=int('100')
            #使用率
            Disk_usage=int('80')
            #lgrep="|grep -v ''"
            km="10M"
            lv=""
            lgrep,sgrep,newDisk_usage,newThe_number,newSpace_occupied_files,newkm,newlv=pygrep(sys.argv[1:])
            
            if lgrep != "" and sgrep == "":
                lgrep="|grep -Ev '%s'"%lgrep
            elif lgrep != "" and sgrep != "":
                lgrep="|grep %s '%s'"%(sgrep,lgrep)
            if newDisk_usage != "":
                Disk_usage=int(newDisk_usage)
            if newThe_number != "" and newThe_number == "all":
                The_number=None
            elif newThe_number != "":
                The_number=int(newThe_number)
            if newSpace_occupied_files != "":
                Space_occupied_files=int(newSpace_occupied_files)
            if newkm != "":
                km=newkm
            if newlv != "":
                lv=newlv
                
            
                
            a=serverdisk(Disk_usage,km,Space_occupied_files,The_number,lgrep,lv)
            tttmp.append('%s'%('\r\n'.join(a)))
            return tttmp

print '%s'%'\r\n'.join(start());

一般直接使用命令df在用find查找出大于指定值的文件,这个脚本是给python qqbot 使用的,

posted @ 2018-03-16 15:32  IT菜鸟园  阅读(365)  评论(0编辑  收藏  举报