innodb事务锁的一些常见数据结构
lock_sys_t
整个innodb的锁系统管理结构体,定义在lock0lock.h中。在lock0lock.cc中有一个lock_sys_t的全局指针lock_sys, 由lock_sys_create分配。
/** The lock system struct */
struct lock_sys_t{
char pad1[CACHE_LINE_SIZE]; /*!< padding to prevent other
memory update hotspots from
residing on the same memory
cache line */
LockMutex mutex; /*!< Mutex protecting the
locks */
/*记录锁的哈希表*/
hash_table_t* rec_hash; /*!< hash table of the record
locks */
/*谓词锁的哈希表*/
hash_table_t* prdt_hash; /*!< hash table of the predicate
lock */
/*页锁的哈希表*/
hash_table_t* prdt_page_hash; /*!< hash table of the page
lock */
char pad2[CACHE_LINE_SIZE]; /*!< Padding */
LockMutex wait_mutex; /*!< Mutex protecting the
next two fields */
/*存放因获取锁而阻塞的线程的数组*/
srv_slot_t* waiting_threads; /*!< Array of user threads
suspended while waiting for
locks within InnoDB, protected
by the lock_sys->wait_mutex */
/*就是数组的最后一个值,在遍历的时候用作边界*/
srv_slot_t* last_slot; /*!< highest slot ever used
in the waiting_threads array,
protected by
lock_sys->wait_mutex */
ibool rollback_complete;
/*!< TRUE if rollback of all
recovered transactions is
complete. Protected by
lock_sys->mutex */
ulint n_lock_max_wait_time; /*!< Max wait time */
/*检查锁等待超时的线程使用的信号量*/
os_event_t timeout_event; /*!< Set to the event that is
created in the lock wait monitor
thread. A value of 0 means the
thread is not active */
bool timeout_thread_active; /*!< True if the timeout thread
is running */
};
其中的rec_hash,prdt_hash, prdt_page_hash分别为行锁、表锁、页锁的哈希表。
lock_rec_t
描述记录锁的结构体,定义在lock0priv.h
struct lock_rec_t {
ib_uint32_t space; /*!< space id */
ib_uint32_t page_no; /*!< page number */
ib_uint32_t n_bits; /*!< number of bits in the lock
bitmap; NOTE: the lock bitmap is
placed immediately after the
lock struct */
/** Print the record lock into the given output stream
@param[in,out] out the output stream
@return the given output stream. */
std::ostream& print(std::ostream& out) const;
};
记录锁通过space:page_no:heap_no来唯一确定,用space和page_no来计算在哈希表使用的key,这个结构体实际上分配的内存会比本身大,后面紧跟着的是一个位图,每一个位用来表示在这个页上的一个记录是否需要加锁,每一个位的偏移量即每条记录的heap_no。
lock_table_t
表锁结构体, 定义在lock0priv.h中
/** A table lock */
struct lock_table_t {
/*指向的表*/
dict_table_t* table; /*!< database table in dictionary
cache */
/*将同一个表的加锁连在一起*/
UT_LIST_NODE_T(lock_t)
locks; /*!< list of locks on the same
table */
/** Print the table lock into the given output stream
@param[in,out] out the output stream
@return the given output stream. */
std::ostream& print(std::ostream& out) const;
};
lock_t
定义于lock0prive.h中通用的锁结构体类型,各种类型的锁都可以用这种结构体表达,lock_sys_t中的哈希表存放的锁也是这种类型的锁结构体。
/** Lock struct; protected by lock_sys->mutex */
struct lock_t {
/*这个锁属于哪个事务*/
trx_t* trx; /*!< transaction owning the
lock */
/*事务中拥有的锁通过一个链表连接起来*/
UT_LIST_NODE_T(lock_t)
trx_locks; /*!< list of the locks of the
transaction */
dict_index_t* index; /*!< index for a record lock */
/*哈希表中同一个key值的节点使用链表连接起来*/
lock_t* hash; /*!< hash chain node for a record
lock. The link node in a singly linked
list, used during hashing. */
/*如果是表锁则为lock_table_t,如果是记录锁则为lock_rec_t,通过type_mode来判断类型*/
union {
lock_table_t tab_lock;/*!< table lock */
lock_rec_t rec_lock;/*!< record lock */
} un_member; /*!< lock details */
/*这个整数的各个位用于表达锁的类型,type_mode的各个位的定义在lock0lock.h中(LOCK_GAP等宏)*/
ib_uint32_t type_mode; /*!< lock type, mode, LOCK_GAP or
LOCK_REC_NOT_GAP,
LOCK_INSERT_INTENTION,
wait flag, ORed */
/** Determine if the lock object is a record lock.
@return true if record lock, false otherwise. */
bool is_record_lock() const
{
return(type() == LOCK_REC);
}
bool is_waiting() const
{
return(type_mode & LOCK_WAIT);
}
bool is_gap() const
{
return(type_mode & LOCK_GAP);
}
bool is_record_not_gap() const
{
return(type_mode & LOCK_REC_NOT_GAP);
}
bool is_insert_intention() const
{
return(type_mode & LOCK_INSERT_INTENTION);
}
ulint type() const {
return(type_mode & LOCK_TYPE_MASK);
}
enum lock_mode mode() const
{
return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
}
/** Print the lock object into the given output stream.
@param[in,out] out the output stream
@return the given output stream. */
std::ostream& print(std::ostream& out) const;
/** Convert the member 'type_mode' into a human readable string.
@return human readable string */
std::string type_mode_string() const;
const char* type_string() const
{
switch (type_mode & LOCK_TYPE_MASK) {
case LOCK_REC:
return("LOCK_REC");
case LOCK_TABLE:
return("LOCK_TABLE");
default:
ut_error;
}
}
};
trx_t
这个结构体代表一个事务,在锁系统中主要是成员trx_lock_t
struct trx_t {
···
trx_lock_t lock; /*!< Information about the transaction
locks and state. Protected by
trx->mutex or lock_sys->mutex
or both */
···
}
trx_lock_t
事务处理中与锁系统相关的部分,是trx_t的成员
struct trx_lock_t {
ulint n_active_thrs; /*!< number of active query threads */
/*事务的状态*/
trx_que_t que_state; /*!< valid when trx->state
== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
TRX_QUE_LOCK_WAIT, ... */
/*事务正在请求的锁*/
lock_t* wait_lock; /*!< if trx execution state is
TRX_QUE_LOCK_WAIT, this points to
the lock request, otherwise this is
NULL; set to non-NULL when holding
both trx->mutex and lock_sys->mutex;
set to NULL when holding
lock_sys->mutex; readers should
hold lock_sys->mutex, except when
they are holding trx->mutex and
wait_lock==NULL */
/*死锁检测的时候用于标记是否访问过*/
ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
to and checked against lock_mark_counter
by lock_deadlock_recursive(). */
/*事务发生了死锁,在死锁检测中被选为进行回滚的事务*/
bool was_chosen_as_deadlock_victim;
/*!< when the transaction decides to
wait for a lock, it sets this to false;
if another transaction chooses this
transaction as a victim in deadlock
resolution, it sets this to true.
Protected by trx->mutex. */
time_t wait_started; /*!< lock wait started at this time,
protected only by lock_sys->mutex */
que_thr_t* wait_thr; /*!< query thread belonging to this
trx that is in QUE_THR_LOCK_WAIT
state. For threads suspended in a
lock wait, this is protected by
lock_sys->mutex. Otherwise, this may
only be modified by the thread that is
serving the running transaction. */
lock_pool_t rec_pool; /*!< Pre-allocated record locks */
lock_pool_t table_pool; /*!< Pre-allocated table locks */
ulint rec_cached; /*!< Next free rec lock in pool */
ulint table_cached; /*!< Next free table lock in pool */
mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
protected by lock_sys->mutex */
/*指向所有已经获取的锁*/
trx_lock_list_t trx_locks; /*!< locks requested by the transaction;
insertions are protected by trx->mutex
and lock_sys->mutex; removals are
protected by lock_sys->mutex */
lock_pool_t table_locks; /*!< All table locks requested by this
transaction, including AUTOINC locks */
bool cancel; /*!< true if the transaction is being
rolled back either via deadlock
detection or due to lock timeout. The
caller has to acquire the trx_t::mutex
in order to cancel the locks. In
lock_trx_table_locks_remove() we
check for this cancel of a transaction's
locks and avoid reacquiring the trx
mutex to prevent recursive deadlocks.
Protected by both the lock sys mutex
and the trx_t::mutex. */
ulint n_rec_locks; /*!< number of rec locks in this trx */
/** The transaction called ha_innobase::start_stmt() to
lock a table. Most likely a temporary table. */
bool start_stmt;
};
DeadlockChecker
死锁检测类,每次加锁的时候都使用这个类进行死锁检测。
/** Deadlock checker. */
class DeadlockChecker {
public:
/** Checks if a joining lock request results in a deadlock. If
a deadlock is found this function will resolve the deadlock
by choosing a victim transaction and rolling it back. It
will attempt to resolve all deadlocks. The returned transaction
id will be the joining transaction id or 0 if some other
transaction was chosen as a victim and rolled back or no
deadlock found.
@param lock lock the transaction is requesting
@param trx transaction requesting the lock
@return id of transaction chosen as victim or 0 */
/*外部调用接口*/
static const trx_t* check_and_resolve(
const lock_t* lock,
trx_t* trx);
private:
/*构造函数为私有函数,由静态成员函数check_and_resolve创建每次进行死锁检测的时候的DeadlockChecker类实例*/
/** Do a shallow copy. Default destructor OK.
@param trx the start transaction (start node)
@param wait_lock lock that a transaction wants
@param mark_start visited node counter */
DeadlockChecker(
const trx_t* trx,
const lock_t* wait_lock,
ib_uint64_t mark_start)
:
m_cost(),
m_start(trx),
m_too_deep(),
m_wait_lock(wait_lock),
m_mark_start(mark_start),
m_n_elems()
{
}
/** Check if the search is too deep. */
bool is_too_deep() const
{
return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
|| m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
}
/** Save current state.
@param lock lock to push on the stack.
@param heap_no the heap number to push on the stack.
@return false if stack is full. */
bool push(const lock_t* lock, ulint heap_no)
{
ut_ad((lock_get_type_low(lock) & LOCK_REC)
|| (lock_get_type_low(lock) & LOCK_TABLE));
ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
== (heap_no == ULINT_UNDEFINED));
/* Ensure that the stack is bounded. */
if (m_n_elems >= UT_ARR_SIZE(s_states)) {
return(false);
}
state_t& state = s_states[m_n_elems++];
state.m_lock = lock;
state.m_wait_lock = m_wait_lock;
state.m_heap_no =heap_no;
return(true);
}
/** Restore state.
@param[out] lock current lock
@param[out] heap_no current heap_no */
void pop(const lock_t*& lock, ulint& heap_no)
{
ut_a(m_n_elems > 0);
const state_t& state = s_states[--m_n_elems];
lock = state.m_lock;
heap_no = state.m_heap_no;
m_wait_lock = state.m_wait_lock;
}
/** Check whether the node has been visited.
@param lock lock to check
@return true if the node has been visited */
bool is_visited(const lock_t* lock) const
{
return(lock->trx->lock.deadlock_mark > m_mark_start);
}
/** Get the next lock in the queue that is owned by a transaction
whose sub-tree has not already been searched.
Note: "next" here means PREV for table locks.
@param lock Lock in queue
@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
@return next lock or NULL if at end of queue */
const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
/** Get the first lock to search. The search starts from the current
wait_lock. What we are really interested in is an edge from the
current wait_lock's owning transaction to another transaction that has
a lock ahead in the queue. We skip locks where the owning transaction's
sub-tree has already been searched.
Note: The record locks are traversed from the oldest lock to the
latest. For table locks we go from latest to oldest.
For record locks, we first position the iterator on first lock on
the page and then reposition on the actual heap_no. This is required
due to the way the record lock has is implemented.
@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
@return first lock or NULL */
const lock_t* get_first_lock(ulint* heap_no) const;
/** Notify that a deadlock has been detected and print the conflicting
transaction info.
@param lock lock causing deadlock */
void notify(const lock_t* lock) const;
/** Select the victim transaction that should be rolledback.
@return victim transaction */
const trx_t* select_victim() const;
/** Rollback transaction selected as the victim. */
void trx_rollback();
/** Looks iteratively for a deadlock. Note: the joining transaction
may have been granted its lock by the deadlock checks.
@return 0 if no deadlock else the victim transaction.*/
const trx_t* search();
/** Print transaction data to the deadlock file and possibly to stderr.
@param trx transaction
@param max_query_len max query length to print */
static void print(const trx_t* trx, ulint max_query_len);
/** rewind(3) the file used for storing the latest detected deadlock
and print a heading message to stderr if printing of all deadlocks to
stderr is enabled. */
static void start_print();
/** Print lock data to the deadlock file and possibly to stderr.
@param lock record or table type lock */
static void print(const lock_t* lock);
/** Print a message to the deadlock file and possibly to stderr.
@param msg message to print */
static void print(const char* msg);
/** Print info about transaction that was rolled back.
@param trx transaction rolled back
@param lock lock trx wants */
static void rollback_print(const trx_t* trx, const lock_t* lock);
private:
/** DFS state information, used during deadlock checking. */
struct state_t {
const lock_t* m_lock; /*!< Current lock */
const lock_t* m_wait_lock; /*!< Waiting for lock */
ulint m_heap_no; /*!< heap number if rec lock */
};
/** Used in deadlock tracking. Protected by lock_sys->mutex. */
static ib_uint64_t s_lock_mark_counter;
/** Calculation steps thus far. It is the count of the nodes visited. */
ulint m_cost;
/** Joining transaction that is requesting a lock in an
incompatible mode */
const trx_t* m_start;
/** TRUE if search was too deep and was aborted */
bool m_too_deep;
/** Lock that trx wants */
const lock_t* m_wait_lock;
/** Value of lock_mark_count at the start of the deadlock check. */
ib_uint64_t m_mark_start;
/** Number of states pushed onto the stack */
size_t m_n_elems;
/** This is to avoid malloc/free calls. */
static state_t s_states[MAX_STACK_SIZE];
};