Lock Free 之 Hazard Pointer
Hazard Pointer
先看个例子:
int *p = new int(2);
void reader() {
if (nullptr != p) { //nullptr是C++11中引入的
cout << *p << endl;
}
}
void writer() {
delete p;
p = nullptr;
}
int main() {
thread t1(reader);
thread t2(writer);
t1.join();
t2.join();
return 0;
}
这个例子中,如果当线程 reader 判断完 p 发现它不是 nullptr 后,还未执行下一行就被调度出去,轮到线程 writer 执行,它执行完后,又继续调度线程 reader,此时执行 cout << *p << endl 导致程序崩溃。
通常多线程同步可以通过加锁解决,不过本文讨论的是lock free情况下的内存管理,下面是一个 Hazard Pointer(风险指针)的例子:
#define MAX_THREAD_NUM 503 #define FETCH_AND_ADD(address, offset) __sync_fetch_and_add(address, offset) #define CAS(address, oldValue, newValue) __sync_bool_compare_and_swap(address, oldValue, newValue) #define CPU_RELAX() __asm__ __volatile__("pause\n" : : : "memory") #define ACCESS_ONCE(x) (*((volatile __typeof__(x) *)&x)) template <typename T> class HazardPointer { private: struct HazardPointerNode { T *p; HazardPointerNode *next; }; // 线程级,每个线程都维护自己的链表,避免加锁 struct HazardPointerList { int16_t len; // list 节点容量 int16_t num; // list 已使用的节点个数 HazardPointerNode list; } CACHE_ALIGNED; public: HazardPointer(); ~HazardPointer(){}; int acquire(T *p); // 将指针 p 插入到 hp_list_,标记该线程正在使用 p,不能被释放 int release(T *p); // 从 hp_list_ 释放指针 p,标记 p 在当前线程不再使用 int retire(T *p); // 逻辑删除,将 指针 p 插入到 retire_list_ int gc(); // 物理删除,对在当前线程 retire_list_ 里面的指针,如不在所有其它线程的 hp_list_ 中就可以 delete private: int help(HazardPointerList *list, T *p); // 辅助在 hp_list_ 或者 retire_list_ 里面插入指针 p int get_thread_id() { static __thread int id = -1; if (id == -1) { id = FETCH_AND_ADD(&thread_count_, 1); } return id; } private: static constexpr int16_t NUM_TO_RECLAIM_P = 10; // 触发 gc 的 retire_list_ 最小长度,避免频繁轮询所有线程的 hp_list_ private: HazardPointerList *hp_list_; // 正在使用的指针列表,每个线程有自己的链表,避免加锁 HazardPointerList *retire_list_; // 逻辑删除的指针列表,每个线程有自己的链表,避免加锁 static int thread_count_; }; template <typename T> HazardPointer<T>::HazardPointer() { hp_list_ = new HazardPointerList[MAX_THREAD_NUM]; retire_list_ = new HazardPointerList[MAX_THREAD_NUM]; for (int i = 0; i < MAX_THREAD_NUM; i++) { hp_list_[i].len = 0; hp_list_[i].num = 0; hp_list_[i].list.next = NULL; hp_list_[i].list.p = NULL; retire_list_[i].len = 0; retire_list_[i].num = 0; retire_list_[i].list.next = NULL; retire_list_[i].list.p = NULL; } } template <typename T> int HazardPointer<T>::thread_count_ = 0; template <typename T> int HazardPointer<T>::help(HazardPointerList *list, T *p) { if (p == NULL) { return 0; } int thread_id = get_thread_id(); bool found = false; HazardPointerNode *tmp = NULL; // 优先复用当前线程的 hp_list 中的空指针 for (tmp = &list[thread_id].list; tmp != NULL; tmp = tmp->next) { if (tmp->p == 0) { found = true; break; } } if (found) { tmp->p = p; list[thread_id].num++; } else { HazardPointerNode *new_hp = new HazardPointerNode(); if (new_hp == NULL) { return -1; } new_hp->next = list[thread_id].list.next; // 插入到链表头部 list[thread_id].list.next = new_hp; new_hp->p = p; list[thread_id].len++; list[thread_id].num++; } return 0; } template <typename T> int HazardPointer<T>::acquire(T *p) { return help(hp_list_, p); } template <typename T> int HazardPointer<T>::release(T *p) { HazardPointerNode *tmp = NULL; int thread_id = get_thread_id(); for (tmp = &hp_list_[thread_id].list; tmp != NULL; tmp = tmp->next) { if (tmp->p == p) { tmp->p = 0; hp_list_[thread_id].num--; return 0; } } return -1; } template <typename T> int HazardPointer<T>::retire(T *p) { return help(retire_list_, p); } template <typename T> int HazardPointer<T>::gc() { int thread_id = get_thread_id(); if (retire_list_[thread_id].num < NUM_TO_RECLAIM_P) { return 0; } else { bool can_be_freed = true; HazardPointerNode *my = NULL; HazardPointerNode *other = NULL; // gc 线程访问其它线程的 hp_list_,是否需要锁呢 ? for (my = &retire_list_[thread_id].list; my != NULL; my = my->next) { T *candidate = my->p; if (candidate == NULL) { continue; } can_be_freed = true; for (int i = 0; i < MAX_THREAD_NUM && can_be_freed; i++) { if (i != thread_id) { for (other = &hp_list_[i].list; other != NULL; other = other->next) { if (other->p == candidate) { can_be_freed = false; break; } } } } if (can_be_freed) { delete candidate; my->p = NULL; retire_list_[thread_id].num--; } } } return 0; }
这里:
- 对于读线程,acquire() 方法会把要读的指针放到 hp_list 中(此时gc() 线程不会将其释放), release() 方法 将 hp_list 对应指针置0(之后 gc() 线程可以释放);
- 对于写线程,retire() 方法会把要释放的指针放到 retire_list 中(逻辑删除队列),最终物理删除是依靠 gc() 方法,gc 线程会检查 retire_list 中的指针,如果其中的指针在其它所有线程的 hp_list 都不存在,才真正释放;
注意:
- 每个线程拥有、管理自己的 retire list 和 hazard pointer list ,这样可以避免维护 retire list和hazard pointer list 的开销,否则我们可能又得想尽脑汁去设计另外一套 lock free 的策略来管理这些 list,先有鸡先有蛋,无穷无尽;
- 每个线程负责回收自己的 retire list 中记录维护的内存,只有当 retire list 的大小(数量)达到一定的阈值时,才进行GC,这样可以把GC的开销进行分摊;
还有个问题,Hazard Pointer 本身的内存只分配,不释放。在stack、queue 等数据结构里,需要的 Hazard Pointer 数量一般为 1(栈顶) 或者 2(队头、队尾),所以不释放问题不大。对于skip list这种数据结构又有遍历需求的,那么Hazard Pointer可能就不是非常适用了,可以考虑使用 Epoch Based Reclamation 技术。
下面的例子是基于 Hazard Pointer 实现的一个无锁队列:
1 template<class T> 2 class AdConcurrentMSQueue { 3 private: 4 struct QueueCell { 5 ~QueueCell() {} 6 T element_; 7 QueueCell *next_; 8 }; 9 10 struct HazardPointerGuard { 11 HazardPointerGuard(AdHazardPointer<QueueCell> &manager, QueueCell *p): manager_(manager), p_(p) { 12 manager_.acquire(p_); 13 } 14 ~HazardPointerGuard() { 15 manager_.release(p_); 16 } 17 AdHazardPointer<QueueCell> &manager_; 18 QueueCell *p_; 19 }; 20 public: 21 AdConcurrentMSQueue(); 22 AdConcurrentMSQueue(const AdConcurrentMSQueue &other) = delete; 23 AdConcurrentMSQueue& operator = (const AdConcurrentMSQueue &other) = delete; 24 bool put(const T &p); 25 bool get(T &p); 26 int64_t size(); 27 int64_t size_approx() { 28 return size(); 29 } 30 private: 31 void spin(); 32 private: 33 QueueCell *volatile head_; 34 QueueCell *volatile tail_; 35 QueueCell *dummy_node_; 36 AdHazardPointer<QueueCell> memory_manager_; 37 int64_t pop_count_; 38 int64_t push_count_; 39 }; 40 41 42 template<class T> 43 AdConcurrentMSQueue<T>::AdConcurrentMSQueue() { 44 dummy_node_ = new QueueCell(); 45 dummy_node_->next_ = nullptr; 46 head_ = dummy_node_; 47 tail_ = dummy_node_; 48 pop_count_ = 0; 49 push_count_ = 0; 50 } 51 52 template<class T> 53 bool AdConcurrentMSQueue<T>::put(const T &p) 54 { 55 QueueCell *cell = new QueueCell(); 56 if (cell == nullptr) { 57 return false; 58 } 59 cell->element_ = p; 60 cell->next_ = nullptr; 61 QueueCell *volatile tail = nullptr; 62 while (true) { 63 tail = tail_; 64 HazardPointerGuard record_tail(memory_manager_, tail); 65 if (tail == tail_) { 66 QueueCell *next = tail->next_; 67 if (next == nullptr) { 68 if (CAS(&tail->next_, next, cell)) { 69 CAS(&tail_, tail, cell); 70 break; 71 } else { 72 spin(); 73 } 74 } else { 75 CAS(&tail_, tail, next); 76 } 77 } else { 78 spin(); 79 } 80 } 81 FETCH_AND_ADD(&push_count_, 1); 82 return true; 83 } 84 85 86 template<class T> 87 bool AdConcurrentMSQueue<T>::get(T &element) { 88 QueueCell *volatile tail = nullptr; 89 QueueCell *volatile head = nullptr; 90 QueueCell *next = nullptr; 91 while (true) { 92 tail = tail_; 93 HazardPointerGuard record_tail(memory_manager_, tail); 94 if (tail != tail_) { 95 continue; 96 } 97 head = head_; 98 HazardPointerGuard record_head(memory_manager_, head); 99 if (head != head_) { 100 continue; 101 } else { 102 next = head->next_; 103 if (head == tail) { 104 if (next == nullptr) { 105 return false; 106 } else { 107 CAS(&tail_, tail, next); 108 } 109 } else { 110 HazardPointerGuard record_next(memory_manager_, next); 111 if (next != head->next_) { 112 continue; 113 } 114 element = next->element_; 115 if (CAS(&head_, head, next)) { 116 break; 117 } else { 118 spin(); 119 } 120 } 121 } 122 } 123 memory_manager_.retire(head); 124 memory_manager_.gc(); 125 FETCH_AND_ADD(&pop_count_, 1); 126 return true; 127 } 128 129 130 template<class T> 131 void AdConcurrentMSQueue<T>::spin() { 132 static const int64_t INIT_LOOP = 1000000; 133 static const int64_t MAX_LOOP = 8000000; 134 static __thread int64_t delay = 0; 135 if (delay <= 0) { 136 delay = INIT_LOOP; 137 } 138 for (int64_t i = 0; i < delay; i++) { 139 CPU_RELAX(); 140 } 141 int64_t new_delay = delay << 1LL; 142 if (new_delay <= 0 || new_delay >= MAX_LOOP) { 143 new_delay = INIT_LOOP; 144 } 145 delay = new_delay; 146 } 147 148 template<class T> 149 int64_t AdConcurrentMSQueue<T>::size() { 150 return ACCESS_ONCE(push_count_) - ACCESS_ONCE(pop_count_); 151 }
参考:
http://www.yebangyu.org/blog/2015/12/10/introduction-to-hazard-pointer/
http://www.yebangyu.org/blog/2016/12/04/introductiontohazardpointer/
======专注高性能web服务器架构和开发=====