MySQL Innodb Engine--MVCC代码瞎猜

MVCC实现

在InnoDB存储引擎中,每个表都是索引组织表,如果建表时没有指定主键索引,会自动创建一个6字节的自增列来作为隐藏主键。

每条聚簇索引的索引记录都包含下面两个隐藏列:

  • 事务ID列,用于标识该版本记录由那个事务修改产生。
  • 回滚指针列,用于指向“存放上一个版本记录数据”的Undo Log指针。

PS:对于存放在Undo Log上的“版本记录”,除包含变化前的用户数据外,也包含事务ID列和回滚指针列

InnoDB存储引擎基于事务ID列回滚指针列来实现多版本并发控制(multi version concurrencey control),为判断事务应读取记录的那个版本,会为事务分配一个read view数据结构,存放如下信息:

  • m_low_limit_id:当前事务能看到的"最大事务ID",超过m_low_limit_id的事务产生的修改对当前事务不可见。
  • m_up_limit_id:当前事务能看到的”最小事务ID“,小于m_up_limit_id的事务产生的修改对当前事务可见。
  • m_creator_trx_id:当前事务的事务ID,当前事务产生的修改对当前事务可见。
  • m_ids:创建read view时活跃事务ID链表,该活跃实例链表中的事务产生的修改对当前事务不可见(事务没提交)。

InnoDB存储引擎的事务系统使用数据结构trx_sys_t来存放一些全局事务信息,

 /*!< The smallest number not yet
 assigned as a transaction id or
 transaction number. This is declared
 volatile because it can be accessed
 without holding any mutex during
 AC-NL-RO view creation. */
 volatile trx_id_t max_trx_id;
 
 /*!< Array of Read write transaction IDs
 for MVCC snapshot. A ReadView would take
 a snapshot of these transactions whose
 changes are not visible to it. We should
 remove transactions from the list before
 committing in memory and releasing locks
 to ensure right order of removal and
 consistent snapshot. */
 trx_ids_t rw_trx_ids; 

trx_sys_t->max_trx_id 用来储存当前未分配的最小事务编号,通过trx_sys_t->max_trx_id能快速获取到read view的m_low_limit_id初始值。

trx_sys->rw_trx_ids 用来存储当前活跃的事务链表,该全局事务连接按照事务ID逆序存放,最小的事务ID存放在链表尾部,通过trx_sys->rw_trx_ids 来初始化m_up_limit_id和m_ids。

read view结构

private:
  // Disable copying
  ReadView(const ReadView &);
  ReadView &operator=(const ReadView &);

 private:
  /** The read should not see any transaction with trx id >= this
  value. In other words, this is the "high water mark". */
  /* 最高水位 */
  trx_id_t m_low_limit_id;

  /** The read should see all trx ids which are strictly
  smaller (<) than this value.  In other words, this is the
  low water mark". */
  /* 最小水位 */
  trx_id_t m_up_limit_id;

  /** trx id of creating transaction, set to TRX_ID_MAX for free
  views. */
  /* 当前事务id */
  trx_id_t m_creator_trx_id;

  /** Set of RW transactions that was active when this snapshot
  was taken */
  /* 活跃事务id */
  ids_t m_ids;

  /** The view does not need to see the undo logs for transactions
  whose transaction number is strictly smaller (<) than this value:
  they can be removed in purge if not needed by other views */
  /* m_low_limit_no 用于undo log的purge操作 */
  trx_id_t m_low_limit_no;

#ifdef UNIV_DEBUG
  /** The low limit number up to which read views don't need to access
  undo log records for MVCC. This could be higher than m_low_limit_no
  if purge is blocked for GTID persistence. Currently used for debug
  variable INNODB_PURGE_VIEW_TRX_ID_AGE. */
  trx_id_t m_view_low_limit_no;
#endif /* UNIV_DEBUG */

  /** AC-NL-RO transaction view that has been "closed". */
  bool m_closed;

  typedef UT_LIST_NODE_T(ReadView) node_t;

  /** List of read views in trx_sys */
  byte pad1[64 - sizeof(node_t)];
  node_t m_view_list;
};

#endif

初始化read_view

/**
Opens a read view where exactly the transactions serialized before this
point in time are seen in the view.
@param id		Creator transaction id */

void ReadView::prepare(trx_id_t id) {
  ut_ad(mutex_own(&trx_sys->mutex));

  m_creator_trx_id = id;

  m_low_limit_no = m_low_limit_id = m_up_limit_id = trx_sys->max_trx_id;

  if (!trx_sys->rw_trx_ids.empty()) {
    copy_trx_ids(trx_sys->rw_trx_ids);
  } else {
    m_ids.clear();
  }

  ut_ad(m_up_limit_id <= m_low_limit_id);

  if (UT_LIST_GET_LEN(trx_sys->serialisation_list) > 0) {
    const trx_t *trx;

    trx = UT_LIST_GET_FIRST(trx_sys->serialisation_list);

    if (trx->no < m_low_limit_no) {
      m_low_limit_no = trx->no;
    }
  }

  ut_d(m_view_low_limit_no = m_low_limit_no);
  m_closed = false;
}

判断记录是否可见

/* storage\innobase\include\read0types.h */

/** Check whether transaction id is valid.
@param[in]	id		transaction id to check
@param[in]	name		table name */
static void check_trx_id_sanity(trx_id_t id, const table_name_t &name);

/** Check whether the changes by id are visible.
@param[in]	id	transaction id to check against the view
@param[in]	name	table name
@return whether the view sees the modifications of id. */
bool changes_visible(trx_id_t id, const table_name_t &name) const
    MY_ATTRIBUTE((warn_unused_result)) {
  ut_ad(id > 0);

  if (id < m_up_limit_id || id == m_creator_trx_id) {
    return (true);
  }

  check_trx_id_sanity(id, name);

  if (id >= m_low_limit_id) {
    return (false);

  } else if (m_ids.empty()) {
    return (true);
  }

  const ids_t::value_type *p = m_ids.data();

  return (!std::binary_search(p, p + m_ids.size(), id));
}

trx_undo_prev_version_build函数

trx_undo_prev_version_build函数用来通过undo log来构造记录的版本

/** Build a previous version of a clustered index record. The caller must hold
a latch on the index page of the clustered index record.
If the vrow passed to this function is not null, then this function will store
information about virtual columns from the requested version in vrow, unless the
change did not affect any secondary index nor ordering field of clustered index
(the change has UPD_NODE_NO_ORD_CHANGE flag) in which case the requested
information can not be reconstructed from undo log, and the caller may assume
that the (virtual) columns of secondary index have the same values they have in
the more recent version (the one `rec` comes from).
Equivalently, if the vrow is not returned, it is either because it was not
requested, or not available due to UPD_NODE_NO_ORD_CHANGE.
Obviously vrow is also not set in case rec is the oldest version in history,
in which case we also set old_vers to NULL.
@param[in]	index_rec	clustered index record in the index tree
@param[in]	index_mtr	mtr which contains the latch to index_rec page
                                and purge_view
@param[in]	rec		version of a clustered index record
@param[in]	index		clustered index
@param[in,out]	offsets		rec_get_offsets(rec, index)
@param[in]	heap		memory heap from which the memory needed is
                                allocated
@param[out]	old_vers	previous version, or NULL if rec is the first
                                inserted version, or if history data has been
                                deleted
@param[in]	v_heap		memory heap used to create vrow dtuple if it is
                                not yet created. This heap diffs from "heap"
                                above in that it could be
                                prebuilt->old_vers_heap for selection
@param[out]	vrow		virtual column info, if any
@param[in]	v_status	status determine if it is going into this
                                function by purge thread or not. And if we read
                                "after image" of undo log has been rebuilt
@param[in]	lob_undo	LOB undo information.
@retval true if previous version was built, or if it was an insert or the table
has been rebuilt
@retval false if the previous version is earlier than purge_view, or being
purged, which means that it may have been removed */
bool trx_undo_prev_version_build(const rec_t *index_rec, mtr_t *index_mtr,
                                 const rec_t *rec, const dict_index_t *index,
                                 ulint *offsets, mem_heap_t *heap,
                                 rec_t **old_vers, mem_heap_t *v_heap,
                                 const dtuple_t **vrow, ulint v_status,
                                 lob::undo_vers_t *lob_undo);

bool trx_undo_prev_version_build(
    const rec_t *index_rec ATTRIB_USED_ONLY_IN_DEBUG,
    mtr_t *index_mtr ATTRIB_USED_ONLY_IN_DEBUG, const rec_t *rec,
    const dict_index_t *const index, ulint *offsets, mem_heap_t *heap,
    rec_t **old_vers, mem_heap_t *v_heap, const dtuple_t **vrow, ulint v_status,
    lob::undo_vers_t *lob_undo) {
  DBUG_TRACE;

  trx_undo_rec_t *undo_rec = nullptr;
  dtuple_t *entry;
  trx_id_t rec_trx_id;
  ulint type;
  undo_no_t undo_no;
  table_id_t table_id;
  trx_id_t trx_id;
  roll_ptr_t roll_ptr;
  upd_t *update = nullptr;
  byte *ptr;
  ulint info_bits;
  ulint cmpl_info;
  bool dummy_extern;
  byte *buf;

  ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_S));
  ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX) ||
        mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_X_FIX));
  ut_ad(rec_offs_validate(rec, index, offsets));
  ut_a(index->is_clustered());

  roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);

  *old_vers = nullptr;

  if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
    /* The record rec is the first inserted version */
    return true;
  }

  rec_trx_id = row_get_rec_trx_id(rec, index, offsets);

  /* REDO rollback segments are used only for non-temporary objects.
  For temporary objects NON-REDO rollback segments are used. */
  bool is_temp = index->table->is_temporary();

  ut_ad(!index->table->skip_alter_undo);

  if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, heap, is_temp,
                            index->table->name, &undo_rec)) {
    if (v_status & TRX_UNDO_PREV_IN_PURGE) {
      /* We are fetching the record being purged */
      undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap, is_temp);
    } else {
      /* The undo record may already have been purged,
      during purge or semi-consistent read. */
      return false;
    }
  }

  type_cmpl_t type_cmpl;
  ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &dummy_extern,
                              &undo_no, &table_id, type_cmpl);

  if (table_id != index->table->id) {
    /* The table should have been rebuilt, but purge has
    not yet removed the undo log records for the
    now-dropped old table (table_id). */
    return true;
  }

  ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, &info_bits);

  /* (a) If a clustered index record version is such that the
  trx id stamp in it is bigger than purge_sys->view, then the
  BLOBs in that version are known to exist (the purge has not
  progressed that far);

  (b) if the version is the first version such that trx id in it
  is less than purge_sys->view, and it is not delete-marked,
  then the BLOBs in that version are known to exist (the purge
  cannot have purged the BLOBs referenced by that version
  yet).

  This function does not fetch any BLOBs.  The callers might, by
  possibly invoking row_ext_create() via row_build().  However,
  they should have all needed information in the *old_vers
  returned by this function.  This is because *old_vers is based
  on the transaction undo log records.  The function
  trx_undo_page_fetch_ext() will write BLOB prefixes to the
  transaction undo log that are at least as long as the longest
  possible column prefix in a secondary index.  Thus, secondary
  index entries for *old_vers can be constructed without
  dereferencing any BLOB pointers. */

  ptr = trx_undo_rec_skip_row_ref(ptr, index);

  ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id, roll_ptr,
                                       info_bits, nullptr, heap, &update,
                                       lob_undo, type_cmpl);
  ut_a(ptr);

  if (row_upd_changes_field_size_or_external(index, offsets, update)) {
    /* We should confirm the existence of disowned external data,
    if the previous version record is delete marked. If the trx_id
    of the previous record is seen by purge view, we should treat
    it as missing history, because the disowned external data
    might be purged already.

    The inherited external data (BLOBs) can be freed (purged)
    after trx_id was committed, provided that no view was started
    before trx_id. If the purge view can see the committed
    delete-marked record by trx_id, no transactions need to access
    the BLOB. */

    /* the row_upd_changes_disowned_external(update) call could be
    omitted, but the synchronization on purge_sys->latch is likely
    more expensive. */

    if ((update->info_bits & REC_INFO_DELETED_FLAG) &&
        row_upd_changes_disowned_external(update)) {
      bool missing_extern;

      rw_lock_s_lock(&purge_sys->latch);

      missing_extern =
          purge_sys->view.changes_visible(trx_id, index->table->name);

      rw_lock_s_unlock(&purge_sys->latch);

      if (missing_extern) {
        /* treat as a fresh insert, not to
        cause assertion error at the caller. */
        return true;
      }
    }

    /* We have to set the appropriate extern storage bits in the
    old version of the record: the extern bits in rec for those
    fields that update does NOT update, as well as the bits for
    those fields that update updates to become externally stored
    fields. Store the info: */

    entry = row_rec_to_index_entry(rec, index, offsets, heap);
    /* The page containing the clustered index record
    corresponding to entry is latched in mtr.  Thus the
    following call is safe. */
    row_upd_index_replace_new_col_vals(entry, index, update, heap);

    buf = static_cast<byte *>(
        mem_heap_alloc(heap, rec_get_converted_size(index, entry)));

    *old_vers = rec_convert_dtuple_to_rec(buf, index, entry);
  } else {
    buf = static_cast<byte *>(mem_heap_alloc(heap, rec_offs_size(offsets)));

    *old_vers = rec_copy(buf, rec, offsets);
    rec_offs_make_valid(*old_vers, index, offsets);
    row_upd_rec_in_place(*old_vers, index, offsets, update, nullptr);
  }

  /* Set the old value (which is the after image of an update) in the
  update vector to dtuple vrow */
  if (v_status & TRX_UNDO_GET_OLD_V_VALUE) {
    row_upd_replace_vcol((dtuple_t *)*vrow, index->table, update, false,
                         nullptr, nullptr);
  }

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
  ut_a(!rec_offs_any_null_extern(
      *old_vers,
      rec_get_offsets(*old_vers, index, nullptr, ULINT_UNDEFINED, &heap)));
#endif  // defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG

  /* If vrow is not NULL it means that the caller is interested in the values of
  the virtual columns for this version.
  If the UPD_NODE_NO_ORD_CHANGE flag is set on cmpl_info, it means that the
  change which created this entry in undo log did not affect any column of any
  secondary index (in particular: virtual), and thus the values of virtual
  columns were not recorded in undo. In such case the caller may assume that the
  values of (virtual) columns present in secondary index are exactly the same as
  they are in the next (more recent) version.
  If on the other hand the UPD_NODE_NO_ORD_CHANGE flag is not set, then we will
  make sure that *vrow points to a properly allocated memory and contains the
  values of virtual columns for this version recovered from undo log.
  This implies that if the caller has provided a non-NULL vrow, and the *vrow is
  still NULL after the call, (and old_vers is not NULL) it must be because the
  UPD_NODE_NO_ORD_CHANGE flag was set for this version.
  This last statement is an important assumption made by the
  row_vers_impl_x_locked_low() function. */
  if (vrow && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
    if (!(*vrow)) {
      *vrow = dtuple_create_with_vcol(v_heap ? v_heap : heap,
                                      index->table->get_n_cols(),
                                      dict_table_get_n_v_cols(index->table));
      dtuple_init_v_fld(*vrow);
    }

    ut_ad(index->table->n_v_cols);
    trx_undo_read_v_cols(index->table, ptr, *vrow,
                         v_status & TRX_UNDO_PREV_IN_PURGE, false, nullptr,
                         (v_heap != nullptr ? v_heap : heap));
  }

  if (update != nullptr) {
    update->reset();
  }

  return true;
}
posted @ 2021-07-14 15:57  TeyGao  阅读(231)  评论(0编辑  收藏  举报