基于锁的并发数据结构

1. 使用细粒度锁和条件变量的线程安全队列

可以使用细粒度的锁来减小队列的临界区，这里使用了一个dummy node用来进一步减小锁的临界区。若要判断队列是否为空，只需要执行下述判断：

head.get() == get_tail()

请注意，因为在进行push的时候需要修改tail，所以对tail的访问和修改都需要进行加锁。这里使用get_tail来封装这个操作，将锁的粒度减小到最低。

// lock tail mutex and return tail node
node *get_tail()
{
std::lock_guard<std::mutex> tail_lock(tail_mutex);
return tail;
}

对push的操作只涉及到修改tail节点，所以只需要对tail节点进行加锁。加锁完成之后就可以修改tail使其指向新的tail节点。

void push(T new_value)
{
    std::shared_ptr<T> new_data(std::make_shared<T>(std::move(new_value)));
    std::unique_ptr<node> p(new node);
    {
      std::lock_guard<std::mutex> tail_lock(tail_mutex);
      tail->data = new_data;
      node *const new_tail = p.get();
      tail->next = std::move(p);
      tail = new_tail;
    }
    data_cond.notify_one();
}

至于try_pop_head()为了应对这一种需求，如果队列为空直接返回，不等待。其操作如下所示：

 std::unique_ptr<node> try_pop_head()
{
    std::lock_guard<std::mutex> head_lock(head_mutex);
    if (head.get() == get_tail())
    {
      return std::unique_ptr<node>();
    }
    return pop_head();
}

至于wait_and_pop()需要一直等待，直到弹出队列中的一个元素。这里使用了条件变量，避免线程循环进行空等待。当然，在push()的时候，需要配合条件变量通知等待的线程。

std::shared_ptr<T> wait_and_pop()
{
    std::unique_ptr<node> const old_head = wait_pop_head();
    return old_head->data;
}

std::unique_ptr<node> wait_pop_head()
{
    std::unique_lock<std::mutex> head_lock(wait_for_data());
    return pop_head();
}

  // wait for data, return std::unique_lock<std::mutex> head_lock
std::unique_lock<std::mutex> wait_for_data()
{
    std::unique_lock<std::mutex> head_lock(head_mutex);
    // wait until not empty
    data_cond.wait(head_lock, [&] { return head.get() != get_tail(); });
    return std::move(head_lock);
}

完整的代码如下所示：

#pragma once
#include <memory>
#include <mutex>

template<typename T>
class threadsafe_queue
{
 public:
  threadsafe_queue() :
      head(new node), tail(head.get()) {}

  std::shared_ptr<T> try_pop()
  {
    std::unique_ptr<node> old_head = try_pop_head();
    return old_head ? old_head->data : std::shared_ptr<T>();
  }

  bool try_pop(T &value)
  {
    std::unique_ptr<node> const old_head = try_pop_head(value);
    return old_head.get();
  }

  std::shared_ptr<T> wait_and_pop()
  {
    std::unique_ptr<node> const old_head = wait_pop_head();
    return old_head->data;
  }

  void wait_and_pop(T &value)
  {
    std::unique_ptr<node> const old_head = wait_pop_head(value);
  }

  void push(T new_value)
  {
    std::shared_ptr<T> new_data(std::make_shared<T>(std::move(new_value)));
    std::unique_ptr<node> p(new node);
    {
      std::lock_guard<std::mutex> tail_lock(tail_mutex);
      tail->data = new_data;
      node *const new_tail = p.get();
      tail->next = std::move(p);
      tail = new_tail;
    }
    data_cond.notify_one();
  }

  bool empty()
  {
    std::lock_guard<std::mutex> head_lock(head_mutex);
    return (head.get() == get_tail());
  }

  threadsafe_queue(const threadsafe_queue &) = delete;
  threadsafe_queue &operator=(const threadsafe_queue &) = delete;

 private:
  struct node
  {
    std::shared_ptr<T> data;
    std::unique_ptr<node> next;
  };

  // lock tail mutex and return tail node
  node *get_tail()
  {
    std::lock_guard<std::mutex> tail_lock(tail_mutex);
    return tail;
  }

  // pop head node from queue, return old head node
  std::unique_ptr<node> pop_head()
  {
    std::unique_ptr<node> old_head = std::move(head);
    head = std::move(old_head->next);
    return old_head;
  }

  // wait for data, return std::unique_lock<std::mutex> head_lock
  std::unique_lock<std::mutex> wait_for_data()
  {
    std::unique_lock<std::mutex> head_lock(head_mutex);
    // wait until not empty
    data_cond.wait(head_lock, [&] { return head.get() != get_tail(); });
    return std::move(head_lock);
  }

  std::unique_ptr<node> wait_pop_head()
  {
    std::unique_lock<std::mutex> head_lock(wait_for_data());
    return pop_head();
  }

  std::unique_ptr<node> wait_pop_head(T& value)
  {
    std::unique_lock<std::mutex> head_lock(wait_for_data());
    value = std::move(*head->data);
    return pop_head();
  }

  std::unique_ptr<node> try_pop_head()
  {
    std::lock_guard<std::mutex> head_lock(head_mutex);
    if (head.get() == get_tail())
    {
      return std::unique_ptr<node>();
    }
    return pop_head();
  }

  std::unique_ptr<node> try_pop_head(T &value)
  {
    std::lock_guard<std::mutex> head_lock(head_mutex);
    if (head.get() == get_tail())
    {
      return std::unique_ptr<node>();
    }
    value = std::move(*head->data);
    return pop_head();
  }

  std::mutex head_mutex; // head mutex
  std::unique_ptr<node> head; // head node
  std::mutex tail_mutex; // tail mutex
  node *tail; // tail node
  std::condition_variable data_cond; // condition variable
};

2. 线程安全hash表

线程安全的hash表是另一个用于展示细粒度锁同步的很好的例子。在hash实现之中，使用了基于桶的开链hash实现。每个桶对应的链表可以统一使用同一个锁进行访问控制。对链表的修改需要使用写锁进行排他的访问控制，对链表的访问则使用读锁进行保护，这样就充分利用了读锁和写锁的区别，将锁的粒度降到最低，减少可能的数据竞争。

下面的代码展示了bucket_type的用法：

class bucket_type
{
 public:
  Value value_for(Key const& key, Value const& default_value) const
  {
    // read 需要加读锁
    boost::shared_lock<boost::shared_mutex> lock(mutex);
    const_bucket_iterator found_entry = find_entry_for(key);
    return (found_entry == data.end()) ? default_value:found_entry->second;
  }

  void add_or_update_mapping(Key const& key, Value const& value)
  {
    // 需要加写锁
    std::unique_lock<boost::shared_mutex> lock(mutex);
    bucket_iterator found_entry = find_entry_for(key);
    if(found_entry == data.end())
    {
      data.push_back(bucket_value(key, value));
    }
    else
    {
      found_entry->second = value;
    }
  }

  void remove_mapping(Key const& key)
  {
    // 需要加写锁
    std::unique_lock<boost::shared_mutex> lock(mutex);
    const_bucket_iterator found_entry = find_entry_for(key);
    if(found_entry != data.end())
    {
      data.erase(found_entry);
    }
  }

 private:
  typedef std::pair<Key, Value> bucket_value;
  typedef std::list<bucket_value> bucket_data;
  typedef typename bucket_data::const_iterator const_bucket_iterator;
  typedef typename bucket_data::iterator bucket_iterator;
  bucket_data data;
  mutable boost::shared_mutex mutex;

  const_bucket_iterator find_entry_for(Key const& key) const
  {
    return std::find_if(data.begin(),data.end(),
                        [&](bucket_value const& item)
                        {return item.first==key;});
  }

  bucket_iterator find_entry_for(Key const& key)
  {
    return std::find_if(data.begin(), data.end(), [&](bucket_value const& item) { return item.first == key; });
  }
};

上述代码体现了读锁和写锁的区别，只有在修改链表的时候才使用写锁保证一致性，在访问链表的时候使用读锁来屏蔽写锁，允许同时访问。

多个hash桶就组合成了一个hash table。根据hash规则拿到对应的hash桶，再对桶内的链表进行读写操作。

std::vector<std::unique_ptr<bucket_type>> buckets;

//获取对应的hash桶
bucket_type& get_bucket(Key const& key) const
{
    // 获取对应桶的操作不用进行加锁
    std::size_t const bucket_index = hasher(key) % buckets.size();
    return *buckets[bucket_index];
}

hash表剩余的操作就是对bucket内置函数的转调用。每个bucket有自己的读写锁进行访问控制。

Value value_for(Key const& key, Value const& default_value=Value()) const
{
    return get_bucket(key).value_for(key, default_value);
}

void add_or_update_mapping(Key const& key, Value const& value)
{
    get_bucket(key).add_or_update_mapping(key, value);
}

void remove_mapping(Key const& key)
{
    get_bucket(key).remove_mapping(key);
}

3. 线程安全链表

对于线程安全的链表，也是用dummy node来标志链表的开头位置。注意对于遍历链表的操作，在对对应的链表节点进行操作的时候，一定要持有对应链表节点的锁，就像这样：

 template<typename Function>
void for_each(Function f)
{
    node* current = &head;
    std::unique_lock<std::mutex> lk(head.m);
    node* next;
    while((next = current->next.get()) != NULL)
    {
      std::unique_lock<std::mutex> next_lk(next->m);
      // unlock node
      lk.unlock();
      f(*next->data);
      current=next;
      lk = std::move(next_lk);
    }
}

template<typename Predicate>
std::shared_ptr<T> find_first_if(Predicate p)
{
    node* current = &head;
    std::unique_lock<std::mutex> lk(head.m);
    while(node* const next = current->next.get())
    {
      std::unique_lock<std::mutex> next_lk(next->m);
      lk.unlock();
      if(p(*next->data))
      {
        return next->data;
      }
      current = next;
      lk = std::move(next_lk);
    }
    return std::shared_ptr<T>();
}

要注意的是，remove操作需要同时持有前后两个节点的锁，这样才能保证重新设置前后节点的时候对应节点不被修改。

template<typename Predicate>
void remove_if(Predicate p)
{
    node* current = &head;
    std::unique_lock<std::mutex> lk(head.m);
    while(node* const next = current->next.get())
    {
      std::unique_lock<std::mutex> next_lk(next->m);
      if(p(*next->data))
      {
        // store old_next node
        // 保证old_next在析构之前其持有的锁已经被解锁
        std::unique_ptr<node> old_next = std::move(current->next);
        current->next = std::move(next->next);
        next_lk.unlock();
      }
      else
      {
        lk.unlock();
        current = next;
        lk = std::move(next_lk);
      }
    }
}

对于整个链表的节点的析构也是借助remove_if完成的。

 ~threadsafe_list()
{
    // remove all node from list
    remove_if([](node const &){ return true; });
}

完整的链表实现代码如下所示：

#include <mutex>

template<typename T>
class threadsafe_list
{
 public:
  threadsafe_list()
  { }

  ~threadsafe_list()
  {
    // remove all node from list
    remove_if([](node const &){ return true; });
  }

  // no copying
  threadsafe_list(threadsafe_list&) = delete;
  threadsafe_list& operator=(threadsafe_list&) = delete;

  // push node in front of the list
  void push_front(T const& value)
  {
    std::unique_ptr<node> new_node(new node(value));
    std::lock_guard<std::mutex> lk(head.m);
    new_node->next = std::move(head.next);
    head.next = std::move(new_node);
  }

  template<typename Function>
  void for_each(Function f)
  {
    node* current = &head;
    std::unique_lock<std::mutex> lk(head.m);
    node* next;
    while((next = current->next.get()) != NULL)
    {
      std::unique_lock<std::mutex> next_lk(next->m);
      // unlock node
      lk.unlock();
      f(*next->data);
      current=next;
      lk = std::move(next_lk);
    }
  }

  template<typename Predicate>
  std::shared_ptr<T> find_first_if(Predicate p)
  {
    node* current = &head;
    std::unique_lock<std::mutex> lk(head.m);
    while(node* const next = current->next.get())
    {
      std::unique_lock<std::mutex> next_lk(next->m);
      lk.unlock();
      if(p(*next->data))
      {
        return next->data;
      }
      current = next;
      lk = std::move(next_lk);
    }
    return std::shared_ptr<T>();
  }

  template<typename Predicate>
  void remove_if(Predicate p)
  {
    node* current = &head;
    std::unique_lock<std::mutex> lk(head.m);
    while(node* const next = current->next.get())
    {
      std::unique_lock<std::mutex> next_lk(next->m);
      if(p(*next->data))
      {
        // store old_next node
        // 保证old_next在析构之前其持有的锁已经被解锁
        std::unique_ptr<node> old_next = std::move(current->next);
        current->next = std::move(next->next);
        next_lk.unlock();
      }
      else
      {
        lk.unlock();
        current = next;
        lk = std::move(next_lk);
      }
    }
  }

 private:
  struct node
  {
    std::mutex m;
    std::shared_ptr<T> data;
    std::unique_ptr<node> next;
    node():
        m(),
        data(),
        next()
    { }

    node(T const& value):
        m(),
        data(std::make_shared<T>(value)),
        next()
    { }
  };

  // dummy node, store node data
  node head;
};

《C++ 并发编程实战》

posted @ 2017-05-30 21:32 ZHOU YANG 阅读(1359) 评论(0) 编辑收藏举报

刷新页面返回顶部

ZHOU YANG

基于锁的并发数据结构

1. 使用细粒度锁和条件变量的线程安全队列

2. 线程安全hash表

3. 线程安全链表

公告