Ext3日志原理(续)

1.  与日志相关字段的初始化

<super.c>

static int parse_options (char *options, struct super_block *sb,

              unsigned long *inum, unsigned long *journal_devnum,

              ext3_fsblk_t *n_blocks_count, int is_remount)

{

   

    case Opt_journal_inum:   // 指定日志文件的inode

        if (is_remount) {

            printk(KERN_ERR "EXT3-fs: cannot specify "

                     "journal on remount\n");

            return 0;

        }

        if (match_int(&args[0], &option))

            return 0;

        *inum = option;

        break;

 

    case Opt_journal_dev: // 指定日志设备

        if (is_remount) {

            printk(KERN_ERR "EXT3-fs: cannot specify "

                  "journal on remount\n");

            return 0;

        }

        if (match_int(&args[0], &option))

            return 0;

        *journal_devnum = option;

        break;     

 

    case Opt_commit: // 指定日志提交间隔

        if (match_int(&args[0], &option))

            return 0;

        if (option < 0)

            return 0;

        if (option == 0)

            option = JBD_DEFAULT_MAX_COMMIT_AGE;

        sbi->s_commit_interval = HZ * option; 

        break;

 

    case Opt_data_journal:  // journal模式挂载

        data_opt = EXT3_MOUNT_JOURNAL_DATA;

        goto datacheck;

 

    case Opt_data_ordered: // ordered模式挂载

        data_opt = EXT3_MOUNT_ORDERED_DATA;

        goto datacheck;

 

    case Opt_data_writeback:   // write_back模式挂载

        data_opt = EXT3_MOUNT_WRITEBACK_DATA;

}

 

2.  开始/停止原子操作

<transaction.c>

handle_t *journal_start(journal_t *journal, int nblocks)

{

    handle_t *handle = journal_current_handle(); // 获取与进程相关的handle

    int err;

 

    if (!journal)

        return ERR_PTR(-EROFS);

 

    if (handle) {

        J_ASSERT(handle->h_transaction->t_journal == journal);

        handle->h_ref++;

        return handle;

    }

   

    // 如果handle为空,则创建一个

    handle = new_handle(nblocks);

    if (!handle)

        return ERR_PTR(-ENOMEM);

 

    current->journal_info = handle; // 与进程相关联

 

    /*

     * start_this_handle: Given a handle, deal with any locking or stalling

     * needed to make sure that there is enough journal space for the handle

 * to begin.  Attach the handle to a transaction and set up the

     * transaction's buffer credits. 

     */

    err = start_this_handle(journal, handle);

    if (err < 0) {

        jbd_free_handle(handle);

        current->journal_info = NULL;

        handle = ERR_PTR(err);

    }

    return handle;

}

 

3.  将修改的元数据块(数据块)写到日志

journal_dirty_metadata(journal_dirty_data)用于将修改的元数据块(数据块)写入日志。

<transaction.c>

/*

* mark a buffer as containing dirty data which needs to be flushed before we can commit the

 * current transaction.

*/

int journal_dirty_data(handle_t *handle, struct buffer_head *bh)

{

    journal_t *journal = handle->h_transaction->t_journal;

    int need_brelse = 0;

    struct journal_head *jh;

 

    if (is_handle_aborted(handle))

        return 0;

 

    jh = journal_add_journal_head(bh); // 根据bh构造journal_head结构

   

    if (jh->b_transaction) {

            __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);

            // jh加入到原子操作相关的链表

        }

    journal_put_journal_head(jh);

    return 0;

}

 

int journal_stop(handle_t *handle)

{

    transaction_t *transaction = handle->h_transaction;

    journal_t *journal = transaction->t_journal;

    int old_handle_count, err;

    pid_t pid;

   

    pid = current->pid;

 

    /*

     * If the handle is marked SYNC, we need to set another commit

     * going!  We also want to force a commit if the current

     * transaction is occupying too much of the log, or if the

     * transaction is too old now.

     */

    if (handle->h_sync ||

            transaction->t_outstanding_credits >

                journal->j_max_transaction_buffers ||

                time_after_eq(jiffies, transaction->t_expires)) {

 

        /* This is non-blocking */

        __log_start_commit(journal, transaction->t_tid);

        spin_unlock(&journal->j_state_lock);

 

        /*

         * Special case: JFS_SYNC synchronous updates require us

         * to wait for the commit to complete. 

         */

        if (handle->h_sync && !(current->flags & PF_MEMALLOC))

            err = log_wait_commit(journal, tid);

    } else {

        spin_unlock(&transaction->t_handle_lock);

        spin_unlock(&journal->j_state_lock);

    }

 

    jbd_free_handle(handle);

    return err;

}

 

4.  事务提交

/*

 * journal_commit_transaction

 *

 * The primary function for committing a transaction to the log.  This

 * function is called by the journal thread to begin a complete commit.

 */

journal_commit_transaction函数为事务链表包含的所有数据缓冲区激活I/O数据传送,并等待数据传送终止。该函数为包含在事务中的所有元数据缓冲区(如果以journal模式安装,则也为所有的数据缓冲区)激活I/O数据传送。内核周期性的为日志中每个完成的事务激活检查点活动。检查点主要验证由journal_commit_transaction触发的I/O数据传送是否已经成功结束;如果是,则从日志中删除事务。

 

5kjournald守护进程

/*

 * kjournald: The main thread function used to manage a logging device

 * journal.

 *

 * This kernel thread is responsible for two things:

 *

 * 1) COMMIT:  Every so often we need to commit the current state of the

 *    filesystem to disk.  The journal thread is responsible for writing

 *    all of the metadata buffers to disk.

 *

 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all

 *    of the data in that part of the log has been rewritten elsewhere on

 *    the disk.  Flushing these old buffers to reclaim space in the log is

 *    known as checkpointing, and this thread is responsible for that job.

 */

 

ext3整个日志记录的过程大致是这样的:

1. 内核在文件系统挂载的时候,初始化了跟日志相关的一些字段【1】。

2. 当应用程序(用户)发出文件系统调用,如write,内核会创建一个句柄(标示一个原子操作处理)【2】,并将其加入活动的事务中。内核将需要修改的元数据(数据)缓冲区加入到对应的链表中【3】。

3. 内核kjournald守护进程【5】周期性的提交日志【4】,当系统调用对应的修改都被写入磁盘后(检查点),内核从日志中删除事务。

 

关于jbd更详细的实现细节,请参考:

 journal block device _jbd_源代码分析.rar   

posted @ 2013-04-19 14:09  ydzhang  阅读(719)  评论(0编辑  收藏  举报