Ext3日志原理(续)
1. 与日志相关字段的初始化
<super.c>
static int parse_options (char *options, struct super_block *sb,
unsigned long *inum, unsigned long *journal_devnum,
ext3_fsblk_t *n_blocks_count, int is_remount)
{
…
case Opt_journal_inum: // 指定日志文件的inode号
if (is_remount) {
printk(KERN_ERR "EXT3-fs: cannot specify "
"journal on remount\n");
return 0;
}
if (match_int(&args[0], &option))
return 0;
*inum = option;
break;
case Opt_journal_dev: // 指定日志设备
if (is_remount) {
printk(KERN_ERR "EXT3-fs: cannot specify "
"journal on remount\n");
return 0;
}
if (match_int(&args[0], &option))
return 0;
*journal_devnum = option;
break;
case Opt_commit: // 指定日志提交间隔
if (match_int(&args[0], &option))
return 0;
if (option < 0)
return 0;
if (option == 0)
option = JBD_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * option;
break;
case Opt_data_journal: // 以journal模式挂载
data_opt = EXT3_MOUNT_JOURNAL_DATA;
goto datacheck;
case Opt_data_ordered: // 以ordered模式挂载
data_opt = EXT3_MOUNT_ORDERED_DATA;
goto datacheck;
case Opt_data_writeback: // 以write_back模式挂载
data_opt = EXT3_MOUNT_WRITEBACK_DATA;
}
2. 开始/停止原子操作
<transaction.c>
handle_t *journal_start(journal_t *journal, int nblocks)
{
handle_t *handle = journal_current_handle(); // 获取与进程相关的handle
int err;
if (!journal)
return ERR_PTR(-EROFS);
if (handle) {
J_ASSERT(handle->h_transaction->t_journal == journal);
handle->h_ref++;
return handle;
}
// 如果handle为空,则创建一个
handle = new_handle(nblocks);
if (!handle)
return ERR_PTR(-ENOMEM);
current->journal_info = handle; // 与进程相关联
/*
* start_this_handle: Given a handle, deal with any locking or stalling
* needed to make sure that there is enough journal space for the handle
* to begin. Attach the handle to a transaction and set up the
* transaction's buffer credits.
*/
err = start_this_handle(journal, handle);
if (err < 0) {
jbd_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
}
return handle;
}
3. 将修改的元数据块(数据块)写到日志
journal_dirty_metadata(journal_dirty_data)用于将修改的元数据块(数据块)写入日志。
<transaction.c>
/*
* mark a buffer as containing dirty data which needs to be flushed before we can commit the
* current transaction.
*/
int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
{
journal_t *journal = handle->h_transaction->t_journal;
int need_brelse = 0;
struct journal_head *jh;
if (is_handle_aborted(handle))
return 0;
jh = journal_add_journal_head(bh); // 根据bh构造journal_head结构
…
if (jh->b_transaction) {
__journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
// 将jh加入到原子操作相关的链表
}
journal_put_journal_head(jh);
return 0;
}
int journal_stop(handle_t *handle)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
int old_handle_count, err;
pid_t pid;
pid = current->pid;
/*
* If the handle is marked SYNC, we need to set another commit
* going! We also want to force a commit if the current
* transaction is occupying too much of the log, or if the
* transaction is too old now.
*/
if (handle->h_sync ||
transaction->t_outstanding_credits >
journal->j_max_transaction_buffers ||
time_after_eq(jiffies, transaction->t_expires)) {
/* This is non-blocking */
__log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock);
/*
* Special case: JFS_SYNC synchronous updates require us
* to wait for the commit to complete.
*/
if (handle->h_sync && !(current->flags & PF_MEMALLOC))
err = log_wait_commit(journal, tid);
} else {
spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
}
jbd_free_handle(handle);
return err;
}
4. 事务提交
/*
* journal_commit_transaction
*
* The primary function for committing a transaction to the log. This
* function is called by the journal thread to begin a complete commit.
*/
journal_commit_transaction函数为事务链表包含的所有数据缓冲区激活I/O数据传送,并等待数据传送终止。该函数为包含在事务中的所有元数据缓冲区(如果以journal模式安装,则也为所有的数据缓冲区)激活I/O数据传送。内核周期性的为日志中每个完成的事务激活检查点活动。检查点主要验证由journal_commit_transaction触发的I/O数据传送是否已经成功结束;如果是,则从日志中删除事务。
5. kjournald守护进程
/*
* kjournald: The main thread function used to manage a logging device
* journal.
*
* This kernel thread is responsible for two things:
*
* 1) COMMIT: Every so often we need to commit the current state of the
* filesystem to disk. The journal thread is responsible for writing
* all of the metadata buffers to disk.
*
* 2) CHECKPOINT: We cannot reuse a used section of the log file until all
* of the data in that part of the log has been rewritten elsewhere on
* the disk. Flushing these old buffers to reclaim space in the log is
* known as checkpointing, and this thread is responsible for that job.
*/
ext3整个日志记录的过程大致是这样的:
1. 内核在文件系统挂载的时候,初始化了跟日志相关的一些字段【1】。
2. 当应用程序(用户)发出文件系统调用,如write,内核会创建一个句柄(标示一个原子操作处理)【2】,并将其加入活动的事务中。内核将需要修改的元数据(数据)缓冲区加入到对应的链表中【3】。
3. 内核kjournald守护进程【5】周期性的提交日志【4】,当系统调用对应的修改都被写入磁盘后(检查点),内核从日志中删除事务。
关于jbd更详细的实现细节,请参考:
journal block device _jbd_源代码分析.rar