Loading [MathJax]/jax/output/HTML-CSS/config.js

Good,True and Beautiful

Good Good Study, Day Day Up

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

统计

Python Quick list dir

昨天 Python释放了 3.5 ,添加了 os.scandir 根据文档该API比os.listdirDocs

which speeds it up by 3-5 times on POSIX systems and by 7-20 times on Windows systems

以前因为目录太大(文件数过万),listdir又太慢,写了一个自己的listdir,发布一下 (仅支持Linux)

#!/usr/bin/python

import os
import ctypes
from ctypes.util import find_library

clib = ctypes.CDLL(find_library('C'))

class c_dir(ctypes.Structure):
	pass
	
class c_dirent(ctypes.Structure):
	_fields_ = (
		('d_ino', ctypes.c_long),
		('d_off', ctypes.c_long), # offset
		('d_reclen', ctypes.c_ushort), # record length
		('d_type', ctypes.c_byte),
		('d_name', ctypes.c_char *4096),
	)
	
c_dir_p = ctypes.POINTER(c_dir)
c_dirent_p = ctypes.POINTER(c_dirent)

opendir, readdir, closedir = clib.opendir, clib.readdir, clib.closedir
opendir.argtypes = [ ctypes.c_char_p ]
opendir.restype = c_dir_p

readdir.argtypes = [ c_dir_p ]
readdir.restype = c_dirent_p

closedir.argtypes = [ c_dir_p ]
closedir.restype = ctypes.c_int

def countdir(path):
	if not os.path.isdir(path):
		raise ValueError('arg error, not a dir: '+path)
	dirfd = opendir(path)
	total_num, total_filename, total_metasize = 0, 0, 0
	try:
		while True:
			entry = readdir(dirfd)
			if not entry:
				break
			total_filename += len(entry.contents.d_name)
			total_metasize += entry.contents.d_reclen
			total_num += 1
	finally:
		closedir(dirfd)
	return {"count":total_num-2, "total_filename":total_filename, "total_metasize":total_metasize,"dirsize":os.path.getsize(path)}

def listdir(path):
	'include two special dirs: . and .. '
	if not os.path.isdir(path):
		raise ValueError('arg error, not a dir: '+path)
	dirfd = opendir(path)
	try:
		while True:
			entry = readdir(dirfd)
			if not entry:
				break
			yield {"name":entry.contents.d_name,
				"inode": entry.contents.d_ino,
				"metasize":entry.contents.d_reclen}
	finally:
		closedir(dirfd)
		
		
if __name__ == '__main__':
	import sys
	i = 0
	total = 0
	path = sys.argv[1]
	print( countdir(path) )
	# for entry in listdir(path):
		# print(entry['name'], entry['metasize'])
		# total += entry['metasize']
	# print('total:', total, 'dir size: ', os.path.getsize(path))

posted on   Simple Love  阅读(521)  评论(0编辑  收藏  举报

编辑推荐:
· 一次Java后端服务间歇性响应慢的问题排查记录
· dotnet 源代码生成器分析器入门
· ASP.NET Core 模型验证消息的本地化新姿势
· 对象命名为何需要避免'-er'和'-or'后缀
· SQL Server如何跟踪自动统计信息更新?
阅读排行:
· “你见过凌晨四点的洛杉矶吗?”--《我们为什么要睡觉》
· C# 从零开始使用Layui.Wpf库开发WPF客户端
· 编程神器Trae:当我用上后,才知道自己的创造力被低估了多少
· C#/.NET/.NET Core技术前沿周刊 | 第 31 期(2025年3.17-3.23)
· 接口重试的7种常用方案!
点击右上角即可分享
微信分享提示