Linux3.10.0块IO子系统流程(4)-- 为请求构造SCSI命令
首先来看scsi_prep_fn
1 int scsi_prep_fn(struct request_queue *q, struct request *req) 2 { 3 struct scsi_device *sdev = q->queuedata; 4 int ret = BLKPREP_KILL; 5 6 if (req->cmd_type == REQ_TYPE_BLOCK_PC) 7 ret = scsi_setup_blk_pc_cmnd(sdev, req); 8 return scsi_prep_return(q, req, ret); 9 }
scsi_prep_fn只能处理来自SCSI公共层的命令,在scsi_setup_blk_pc_cmnd函数返回后,根据返回值调用scsi_prep_return进行相应的处理
1 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) 2 { 3 struct scsi_cmnd *cmd; 4 int ret = scsi_prep_state_check(sdev, req); // 根据请求的标志位以及SCSI设备的状态进行初步检查 5 6 if (ret != BLKPREP_OK) 7 return ret; 8 9 /* 10 * 分配一个新的scsi_cmnd描述符,将它记录在special域;如果这里已经指向了一个现有的scsi_cmnd描述符,直接使用它 11 */ 12 cmd = scsi_get_cmd_from_req(sdev, req); 13 if (unlikely(!cmd)) 14 return BLKPREP_DEFER; 15 16 /* 17 * BLOCK_PC requests may transfer data, in which case they must a bio attached to them. Or they might contain a SCSI command 18 * that does not transfer data, in which case they may optionally submit a request without an attached bio. 19 * 尽管请求来自SCSI公共服务层,但是这些请求也可以涉及数据传输,在bio中保存的数据最终需要复制到SCSI命令描述符的数据缓冲区中 20 * 具体的工作由scsi_init_io完成,后续分析 21 * 如果不涉及数据传输,就将SCSI命令缓冲区清零 22 */ 23 if (req->bio) { 24 int ret; 25 26 BUG_ON(!req->nr_phys_segments); 27 28 ret = scsi_init_io(cmd, GFP_ATOMIC); 29 if (unlikely(ret)) 30 return ret; 31 } else { 32 BUG_ON(blk_rq_bytes(req)); 33 34 memset(&cmd->sdb, 0, sizeof(cmd->sdb)); 35 req->buffer = NULL; 36 } 37 38 cmd->cmd_len = req->cmd_len; 39 if (!blk_rq_bytes(req)) 40 cmd->sc_data_direction = DMA_NONE; 41 else if (rq_data_dir(req) == WRITE) 42 cmd->sc_data_direction = DMA_TO_DEVICE; 43 else 44 cmd->sc_data_direction = DMA_FROM_DEVICE; 45 46 cmd->transfersize = blk_rq_bytes(req); 47 cmd->allowed = req->retries; 48 return BLKPREP_OK; 49 }
sd_prep_fn函数从request结构中的信息构造SCSI(读或写)命令,将结果保存在request的special域,sd_prep_fn只能处理来自上层的请求(REQ_TYPE_FS),以及来自SCSI层的(REQ_TYPE_BLOCK_PC)请求。有一种请求比较特殊,即所谓的DISCARD请求。这个请求来自上层,但需要被转换成SCSI请求来处理
1 /** 2 * sd_prep_fn - build a scsi (read or write) command from 3 * information in the request structure. 4 * @SCpnt: pointer to mid-level's per scsi command structure that 5 * contains request and into which the scsi command is written 6 * 7 * Returns 1 if successful and 0 if error (or cannot be done now). 8 **/ 9 static int sd_prep_fn(struct request_queue *q, struct request *rq) 10 { 11 struct scsi_cmnd *SCpnt; 12 struct scsi_device *sdp = q->queuedata; 13 struct gendisk *disk = rq->rq_disk; 14 struct scsi_disk *sdkp; 15 sector_t block = blk_rq_pos(rq); 16 sector_t threshold; 17 unsigned int this_count = blk_rq_sectors(rq); 18 int ret, host_dif; 19 unsigned char protect; 20 21 /* 22 * Discard request come in as REQ_TYPE_FS but we turn them into 23 * block PC requests to make life easier. 24 */ 25 if (rq->cmd_flags & REQ_DISCARD) { 26 ret = sd_setup_discard_cmnd(sdp, rq); 27 goto out; 28 } else if (rq->cmd_flags & REQ_WRITE_SAME) { 29 ret = sd_setup_write_same_cmnd(sdp, rq); 30 goto out; 31 } else if (rq->cmd_flags & REQ_FLUSH) { 32 ret = scsi_setup_flush_cmnd(sdp, rq); 33 goto out; 34 } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 35 ret = scsi_setup_blk_pc_cmnd(sdp, rq); 36 goto out; 37 } else if (rq->cmd_type != REQ_TYPE_FS) { 38 ret = BLKPREP_KILL; 39 goto out; 40 } 41 ret = scsi_setup_fs_cmnd(sdp, rq); 42 if (ret != BLKPREP_OK) 43 goto out; 44 SCpnt = rq->special; 45 sdkp = scsi_disk(disk); 46 47 /* from here on until we're complete, any goto out 48 * is used for a killable error condition */ 49 ret = BLKPREP_KILL; 50 51 SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, 52 "sd_prep_fn: block=%llu, " 53 "count=%d\n", 54 (unsigned long long)block, 55 this_count)); 56 57 /* 58 * 以下几种情况直接结束命令: 59 * 1.SCSI不在线 60 * 2.请求数据超出了设备容量 61 * 3.磁盘介质发生了变化 62 */ 63 if (!sdp || !scsi_device_online(sdp) || 64 block + blk_rq_sectors(rq) > get_capacity(disk)) { 65 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, 66 "Finishing %u sectors\n", 67 blk_rq_sectors(rq))); 68 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, 69 "Retry with 0x%p\n", SCpnt)); 70 goto out; 71 } 72 73 if (sdp->changed) { 74 /* 75 * quietly refuse to do anything to a changed disc until 76 * the changed bit has been reset 77 */ 78 /* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */ 79 goto out; 80 } 81 82 /* 83 * Some SD card readers can't handle multi-sector accesses which touch the last one or two hardware sectors. Split accesses as needed. 84 * 某些设备(如SD卡)不能多扇区访问最后的部分扇区,需分割访问 85 */ 86 threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS * 87 (sdp->sector_size / 512); 88 89 if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) { 90 if (block < threshold) { 91 /* Access up to the threshold but not beyond */ 92 this_count = threshold - block; 93 } else { 94 /* Access only a single hardware sector */ 95 this_count = sdp->sector_size / 512; 96 } 97 } 98 99 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n", 100 (unsigned long long)block)); 101 102 /* 103 * If we have a 1K hardware sectorsize, prevent access to single 512 byte sectors. 104 * In theory we could handle this - in fact the scsi cdrom driver must be able to handle this because 105 * we typically use 1K blocksizes, and cdroms typically have 2K hardware sectorsizes. 106 * Of course, things are simpler with the cdrom, since it is read-only. For performance reasons, 107 * the filesystems should be able to handle this and not force the scsi disk driver to use bounce buffers for this. 108 * 磁盘的硬件扇区长度可能不是512,而是1024/2048或4096 109 */ 110 if (sdp->sector_size == 1024) { 111 if ((block & 1) || (blk_rq_sectors(rq) & 1)) { 112 scmd_printk(KERN_ERR, SCpnt, 113 "Bad block number requested\n"); 114 goto out; 115 } else { 116 block = block >> 1; 117 this_count = this_count >> 1; 118 } 119 } 120 if (sdp->sector_size == 2048) { 121 if ((block & 3) || (blk_rq_sectors(rq) & 3)) { 122 scmd_printk(KERN_ERR, SCpnt, 123 "Bad block number requested\n"); 124 goto out; 125 } else { 126 block = block >> 2; 127 this_count = this_count >> 2; 128 } 129 } 130 if (sdp->sector_size == 4096) { 131 if ((block & 7) || (blk_rq_sectors(rq) & 7)) { 132 scmd_printk(KERN_ERR, SCpnt, 133 "Bad block number requested\n"); 134 goto out; 135 } else { 136 block = block >> 3; 137 this_count = this_count >> 3; 138 } 139 } 140 if (rq_data_dir(rq) == WRITE) { 141 if (!sdp->writeable) { 142 goto out; 143 } 144 SCpnt->cmnd[0] = WRITE_6; 145 SCpnt->sc_data_direction = DMA_TO_DEVICE; 146 147 if (blk_integrity_rq(rq)) 148 sd_dif_prepare(rq, block, sdp->sector_size); 149 150 } else if (rq_data_dir(rq) == READ) { 151 SCpnt->cmnd[0] = READ_6; 152 SCpnt->sc_data_direction = DMA_FROM_DEVICE; 153 } else { 154 scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags); 155 goto out; 156 } 157 158 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, 159 "%s %d/%u 512 byte blocks.\n", 160 (rq_data_dir(rq) == WRITE) ? 161 "writing" : "reading", this_count, 162 blk_rq_sectors(rq))); 163 164 /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ 165 host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); 166 if (host_dif) 167 protect = 1 << 5; 168 else 169 protect = 0; 170 171 if (host_dif == SD_DIF_TYPE2_PROTECTION) { 172 SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); 173 174 if (unlikely(SCpnt->cmnd == NULL)) { 175 ret = BLKPREP_DEFER; 176 goto out; 177 } 178 179 SCpnt->cmd_len = SD_EXT_CDB_SIZE; 180 memset(SCpnt->cmnd, 0, SCpnt->cmd_len); 181 SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD; 182 SCpnt->cmnd[7] = 0x18; 183 SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32; 184 SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); 185 186 /* LBA */ 187 SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; 188 SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; 189 SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; 190 SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; 191 SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff; 192 SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff; 193 SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff; 194 SCpnt->cmnd[19] = (unsigned char) block & 0xff; 195 196 /* Expected Indirect LBA */ 197 SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff; 198 SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff; 199 SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff; 200 SCpnt->cmnd[23] = (unsigned char) block & 0xff; 201 202 /* Transfer length */ 203 SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff; 204 SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff; 205 SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff; 206 SCpnt->cmnd[31] = (unsigned char) this_count & 0xff; 207 } else if (sdp->use_16_for_rw) { 208 SCpnt->cmnd[0] += READ_16 - READ_6; 209 SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); 210 SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; 211 SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; 212 SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; 213 SCpnt->cmnd[5] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; 214 SCpnt->cmnd[6] = (unsigned char) (block >> 24) & 0xff; 215 SCpnt->cmnd[7] = (unsigned char) (block >> 16) & 0xff; 216 SCpnt->cmnd[8] = (unsigned char) (block >> 8) & 0xff; 217 SCpnt->cmnd[9] = (unsigned char) block & 0xff; 218 SCpnt->cmnd[10] = (unsigned char) (this_count >> 24) & 0xff; 219 SCpnt->cmnd[11] = (unsigned char) (this_count >> 16) & 0xff; 220 SCpnt->cmnd[12] = (unsigned char) (this_count >> 8) & 0xff; 221 SCpnt->cmnd[13] = (unsigned char) this_count & 0xff; 222 SCpnt->cmnd[14] = SCpnt->cmnd[15] = 0; 223 } else if ((this_count > 0xff) || (block > 0x1fffff) || 224 scsi_device_protection(SCpnt->device) || 225 SCpnt->device->use_10_for_rw) { 226 if (this_count > 0xffff) 227 this_count = 0xffff; 228 229 SCpnt->cmnd[0] += READ_10 - READ_6; 230 SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); 231 SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; 232 SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; 233 SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; 234 SCpnt->cmnd[5] = (unsigned char) block & 0xff; 235 SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0; 236 SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; 237 SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; 238 } else { 239 if (unlikely(rq->cmd_flags & REQ_FUA)) { 240 /* 241 * This happens only if this drive failed 242 * 10byte rw command with ILLEGAL_REQUEST 243 * during operation and thus turned off 244 * use_10_for_rw. 245 */ 246 scmd_printk(KERN_ERR, SCpnt, 247 "FUA write on READ/WRITE(6) drive\n"); 248 goto out; 249 } 250 251 SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); 252 SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff); 253 SCpnt->cmnd[3] = (unsigned char) block & 0xff; 254 SCpnt->cmnd[4] = (unsigned char) this_count; 255 SCpnt->cmnd[5] = 0; 256 } 257 SCpnt->sdb.length = this_count * sdp->sector_size; 258 259 /* If DIF or DIX is enabled, tell HBA how to handle request */ 260 if (host_dif || scsi_prot_sg_count(SCpnt)) 261 sd_prot_op(SCpnt, host_dif); 262 263 /* 264 * We shouldn't disconnect in the middle of a sector, so with a dumb 265 * host adapter, it's safe to assume that we can at least transfer 266 * this many bytes between each connect / disconnect. 267 */ 268 SCpnt->transfersize = sdp->sector_size; 269 SCpnt->underflow = this_count << 9; 270 SCpnt->allowed = SD_MAX_RETRIES; 271 272 /* 273 * This indicates that the command is ready from our end to be 274 * queued. 275 */ 276 ret = BLKPREP_OK; 277 out: 278 return scsi_prep_return(q, rq, ret); 279 }
scsi_setup_fs_cmnd函数前面和之前分析过的差不多,来自文件系统的请求是需要传输数据的,也就是要为bio描述符中的数据分配SCSI数据缓冲区,就是scsi_init_io
1 /* 2 * Setup a REQ_TYPE_FS command. These are simple read/write request 3 * from filesystems that still need to be translated to SCSI CDBs from 4 * the ULD. 5 */ 6 int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) 7 { 8 struct scsi_cmnd *cmd; 9 int ret = scsi_prep_state_check(sdev, req); 10 11 if (ret != BLKPREP_OK) 12 return ret; 13 14 if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh 15 && sdev->scsi_dh_data->scsi_dh->prep_fn)) { 16 ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req); 17 if (ret != BLKPREP_OK) 18 return ret; 19 } 20 21 /* 22 * Filesystem requests must transfer data. 23 */ 24 BUG_ON(!req->nr_phys_segments); 25 26 cmd = scsi_get_cmd_from_req(sdev, req); 27 if (unlikely(!cmd)) 28 return BLKPREP_DEFER; 29 30 /* 31 * 来自上层的请求信息都在bio里,和SCSI公共层请求不一样,我们需要重新为它构造SCSI规范定义的SCSI命令 32 * 构造好的内容会保存在scsi_cmnd描述符的cmnd域,所以首先将这个命令缓冲区清零 33 */ 34 memset(cmd->cmnd, 0, BLK_MAX_CDB); 35 return scsi_init_io(cmd, GFP_ATOMIC); 36 }
-------------------------------------------------- 少年应是春风和煦,肩头挑着草长莺飞 --------------------------------------------------