(linux)块设备驱动程序
1.4.1 Linux块设备驱动程序原理(1)
顾名思义,块设备驱动程序就是支持以块的方式进行读写的设备。块设备和字符设备最大的区别在于读写数据的基本单元不同。块设备读写数据的基本单元为块,例如磁盘通常为一个sector,而字符设备的基本单元为字节。从实现角度来看,字符设备的实现比较简单,内核例程和用户态API一一对应,这种映射关系由字符设备的file_operations维护。块设备接口则相对复杂,读写API没有直接到块设备层,而是直接到文件系统层,然后再由文件系统层发起读写请求。
block_device结构代表了内核中的一个块设备。它可以表示整个磁盘或一个特定的分区。当这个结构代表一个分区时,它的bd_contains成员指向包含这个分区的设备,bd_part成员指向设备的分区结构。当这个结构代表一个块设备时,bd_disk成员指向设备的gendisk结构。
- struct block_device {
- dev_t bd_dev;
- struct inode * bd_inode; /*分区结点*/
- int bd_openers;
- struct semaphore bd_sem; /*打开/关闭锁*/
- struct semaphore bd_mount_sem; /* 加载互斥锁*/
- struct list_head bd_inodes;
- void * bd_holder;
- int bd_holders;
- struct block_device * bd_contains;
- unsigned bd_block_size;//分区块大小
- struct hd_struct * bd_part;
- unsigned bd_part_count;//打开次数
- int bd_invalidated;
- struct gendisk * bd_disk;
- struct list_head bd_list;
- struct backing_dev_info *bd_inode_backing_dev_info;
- unsigned long bd_private;
- };
gendisk是一个单独的磁盘驱动器的内核表示。内核还使用gendisk来表示分区。
- struct gendisk {
- int major; //主设备号
- int first_minor;
- int minors; //最大的次设备号数量,如果设备不能分区,该值为1
- char disk_name[32]; //主设备名
- struct hd_struct **part; //分区信息,有minors个
- struct block_device_operations *fops;//设备操作
- struct request_queue *queue; //设备管理I/O请求
- void *private_data;
- sector_t capacity;
- int flags;
- char devfs_name[64];
- int number;
- struct device *driverfs_dev;
- struct kobject kobj;
- struct timer_rand_state *random;
- int policy;
- atomic_t sync_io;
- unsigned long stamp, stamp_idle;
- int in_flight;
- #ifdef CONFIG_SMP
- struct disk_stats *dkstats;
- #else
- struct disk_stats dkstats;
- #endif
- };
gendisk结构的操作函数包括以下几个:
- struct gendisk *alloc_disk(int minors); //分配磁盘
- void add_disk(struct gendisk *disk); //增加磁盘信息
- void unlink_gendisk(struct gendisk *disk) //删除磁盘信息
- void delete_partition(struct gendisk *disk, int part); //删除分区
- void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags);//添加分区
-
1.4.1 Linux块设备驱动程序原理(2)
block_device_operations结构是块设备对应的操作接口,是连接抽象的块设备操作与具体块设备操作之间的枢纽。
- struct block_device_operations {
- int (*open) (struct inode *, struct file *);
- int (*release) (struct inode *, struct file *);
- int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
- long (*unlocked_ioctl) (struct file *, unsigned, unsigned long);
- long (*compat_ioctl) (struct file *, unsigned, unsigned long);
- int (*direct_access) (struct block_device *, sector_t, unsigned long *);
- int (*media_changed) (struct gendisk *);
- int (*revalidate_disk) (struct gendisk *);
- int (*getgeo)(struct block_device *, struct hd_geometry *);
- struct module *owner;
- };
- const struct file_operations def_blk_fops = {
- .open = blkdev_open,
- .release = blkdev_close,
- .llseek = block_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write= generic_file_aio_write_nolock,
- .mmap = generic_file_mmap,
- .fsync = block_fsync,
- .unlocked_ioctl = block_ioctl,
- #ifdef CONFIG_COMPAT
- .compat_ioctl = compat_blkdev_ioctl,
- #endif
- .splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
- };
- struct request_queue
- {
- struct list_head queue_head;
- struct request *last_merge;
- elevator_t elevator;
- /*请求队列列表*/
- struct request_list rq;
- request_fn_proc *request_fn;
- merge_request_fn *back_merge_fn;
- merge_request_fn *front_merge_fn;
- merge_requests_fn *merge_requests_fn;
- make_request_fn *make_request_fn;
- prep_rq_fn *prep_rq_fn;
- unplug_fn *unplug_fn;
- merge_bvec_fn *merge_bvec_fn;
- activity_fn *activity_fn;
- /*自动卸载状态*/
- struct timer_list unplug_timer;
- int unplug_thresh;
- unsigned long unplug_delay; /*自动卸载延时*/
- struct work_struct unplug_work;
- struct backing_dev_info backing_dev_info;
- void *queuedata;
- void *activity_data;
- unsigned long bounce_pfn;
- int bounce_gfp;
- unsigned long queue_flags;//各种队列标志
- /*保护队列结构,避免重入*/
- spinlock_t *queue_lock;
- /* 请求的核心结构*/
- struct kobject kobj;
- /*请求的配置*/
- unsigned long nr_requests; /* 请求的最大数*/
- unsigned int nr_congestion_on;
- unsigned int nr_congestion_off;
- unsigned short max_sectors;
- unsigned short max_phys_segments;
- unsigned short max_hw_segments;
- unsigned short hardsect_size;
- unsigned int max_segment_size;
- unsigned long seg_boundary_mask;
- unsigned int dma_alignment;
- struct blk_queue_tag *queue_tags;
- atomic_t refcnt;
- unsigned int in_flight;
- /*sg 参数配置*/
- unsigned int sg_timeout;
- unsigned int sg_reserved_size;
- };
- //创建队列时提供了一个自旋锁。
- request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock);
- //获得队列中第一个未完成的请求。
- struct request *elv_next_request(request_queue_t *q);
- void end_request(struct request *req, int uptodate);//请求完成
- void blk_stop_queue(request_queue_t *queue); //停止请求
- void blk_start_queue(request_queue_t *queue); //开始请求
- void blk_cleanup_queue(request_queue_t *);//清除请求队列
1.4.2 简单的块设备驱动程序实例
向内核注册和注销一个块设备可使用如下函数:
- int register_blkdev(unsigned int major, const char *name);
- int unregister_blkdev(unsigned int major, const char *name);
例1.10 简单的块设备驱动程序实例
代码见光盘\src\1drivermodel\1-10block。核心代码如下所示:
- static struct request_queue *Queue;
- //自定义块设备结构
- static struct simpleblockdevice
- {
- unsigned long size;
- spinlock_t lock;
- u8 *data;
- struct gendisk *gd;
- } Device;
- //处理I/O请求
- static void simpleblocktransfer(struct simpleblockdevice *dev, unsigned long sector,
- unsigned long nsect, char *buffer, int write)
- {
- unsigned long offset = sector*hardsect_size;
- unsigned long nbytes = nsect*hardsect_size;
- //判断I/O请求是否超出范围
- if ((offset + nbytes) > dev->size)
- {
- printk (KERN_NOTICE "sbd: Beyond-end write (%ld %ld)\n", offset, nbytes);
- return;
- }
- if (write)
- memcpy(dev->data + offset, buffer, nbytes);
- else
- memcpy(buffer, dev->data + offset, nbytes);
- }
- //简单请求处理
- static void simpleblockrequest(struct request_queue *q)
- {
- struct request *req;
- //获取下一个请求
- while ((req = elv_next_request(q)) != NULL)
- {
- if (! blk_fs_request(req))
- {
- printk (KERN_NOTICE "Skip non-CMD request\n");
- end_request(req, 0);
- continue;
- }
- simpleblocktransfer(&Device, req->sector, req->current_nr_sectors,
- req->buffer, rq_data_dir(req));
- end_request(req, 1);
- }
- }
- //简单的块设备ioctl函数
- int simpleblockioctl (struct inode *inode, struct file *filp,unsigned int cmd, unsigned long arg)
- {
- long size;
- struct hd_geometry geo;
- switch(cmd)
- {
- //获取磁盘信息
- case HDIO_GETGEO:
- size = Device.size*(hardsect_size/KERNEL_SECTOR_SIZE);
- geo.cylinders = (size & ~0x3f) >> 6;
- geo.heads = 4;
- geo.sectors = 16;
- geo.start = 4;
- if (copy_to_user((void *) arg, &geo, sizeof(geo)))
- return -EFAULT;
- return 0;
- }
- return -ENOTTY; /* 未知命令 */
- }
- //设备操作结构
- static struct block_device_operations simpleblockops = {
- .owner = THIS_MODULE,
- .ioctl = simpleblockioctl
- };
- static int __init simpleblockinit(void)
- {
- Device.size = nsectors*hardsect_size;
- spin_lock_init(&Device.lock);
- Device.data = vmalloc(Device.size);
- if (Device.data == NULL)
- return -ENOMEM;
- //初始化请求队列,配置处理函数为sbd_request
- Queue = blk_init_queue(simpleblockrequest, &Device.lock);
- if (Queue == NULL)
- goto out;
- blk_queue_hardsect_size(Queue, hardsect_size);
- //注册块设备
- major_num = register_blkdev(major_num, "sbd");
- if (major_num <= 0) {
- printk(KERN_WARNING "sbd: unable to get major number\n");
- goto out;
- }
- Device.gd = alloc_disk(16);
- if (! Device.gd)
- goto out_unregister;
- Device.gd->major = major_num;
- Device.gd->first_minor = 0;
- Device.gd->fops = &simpleblockops;
- Device.gd->private_data = &Device;
- strcpy (Device.gd->disk_name, "sbd0");
- //配置容量
- set_capacity(Device.gd, nsectors*(hardsect_size/KERNEL_SECTOR_SIZE));
- Device.gd->queue = Queue;
- add_disk(Device.gd);
- return 0;
- out_unregister:
- unregister_blkdev(major_num, "sbd");
- out:
- vfree(Device.data);
- return -ENOMEM;
- }
- static void __exit simpleblockexit(void)
- {
- del_gendisk(Device.gd);
- put_disk(Device.gd);
- unregister_blkdev(major_num, "sbd");
- blk_cleanup_queue(Queue);
- vfree(Device.data);
- }
- module_init(simpleblockinit);
- module_exit(simpleblockexit);
- [root@/home]#cat /proc/filesystems
- nodev sysfs
- nodev rootfs
- nodev bdev
- nodev proc
- nodev binfmt_misc
- nodev debugfs
- nodev securityfs
- nodev sockfs
- nodev usbfs
- nodev pipefs
- nodev anon_inodefs
- nodev futexfs
- nodev tmpfs
- nodev inotifyfs
- ext3
- cramfs
- nodev ramfs
- msdos
- vfat
- iso9660
- nodev nfs
- nodev nfs4
- nodev mqueue
- nodev rpc_pipefs
- [root@/home]#insmod demo.ko
- sbd0: unknown partition table
- [root@/home]#mknod /dev/sbd b 253 0
- [root@/home]#./mkfs.ext3 /dev/sbd
- mke2fs 1.40.9 (27-Apr-2008)
- Filesystem label=
- OS type: Linux
- Block size=1024 (log=0)
- Fragment size=1024 (log=0)
- 1280 inodes, 5120 blocks
- 256 blocks (5.00%) reserved for the super user
- First data block=1
- Maximum filesystem blocks=5242880
- 1 block group
- 8192 blocks per group, 8192 fragments per group
- 1280 inodes per group
- Writing inode tables: done
- Creating journal (1024 blocks): done
- Writing superblocks and filesystem accounting information: done
- This filesystem will be automatically checked every 39 mounts or
- 180 days, whichever comes first. Use tune2fs -c or -i to override.
- [root@/home]#mount -t ext3 /dev/sbd /mnt/u
- kjournald starting. Commit interval 5 seconds
- EXT3 FS on sbd0, internal journal
- EXT3-fs: mounted filesystem with ordered data mode.
- [root@/home]#df
- Filesystem 1k-blocks Used Available Use% Mounted on
- rootfs 2063504 1191136 767548 61% /
- /dev/root 2063504 1191136 767548 61% /
- /dev/sbd 4955 1063 3636 23% /mnt/u
- [root@/home]#cd /mnt/u
- [root@/mnt/u]#ls
- lost+found
【最是那一低头的温柔】
【好像水莲花不胜凉风的娇羞】