【ceph】simpleManssager通信代码分析
ceph 版本:ceph-12.0.0
原理和源码分析:https://ivanzz1001.github.io/records/post/ceph/2019/01/05/ceph-src-code-part3_1
2.1 server端代码
实例代码:ceph-12.0.0\src\test\messenger\simple_server.cc
int main(int argc, const char **argv)
{
vector<const char*> args;
Messenger *messenger;
Dispatcher *dispatcher;
……
string dest_str = "tcp://";
dest_str += addr;
dest_str += ":";
dest_str += port;
entity_addr_from_url(&bind_addr, dest_str.c_str());
messenger = Messenger::create(g_ceph_context, g_conf->ms_type,
entity_name_t::MON(-1),
"simple_server",
0 /* nonce */,
0 /* flags */);
……
r = messenger->bind(bind_addr);
if (r < 0)
goto out;
// Set up crypto, daemonize, etc.
//global_init_daemonize(g_ceph_context, 0);
common_init_finish(g_ceph_context);
dispatcher = new SimpleDispatcher(messenger);
messenger->add_dispatcher_head(dispatcher); // should reach ready()
messenger->start();
messenger->wait(); // can't be called until ready()
// done
delete messenger;
……
}
- 将地址字符串解析为entity_addr_t类的地址
string dest_str = "tcp://";
dest_str += addr;
dest_str += ":";
dest_str += port;
entity_addr_from_url(&bind_addr, dest_str.c_str()); //将字符串地址解析为实体地址
- 创建simplemessenger,并进行magic和policy设置
messenger = Messenger::create(g_ceph_context, g_conf().get_val<std::string>("ms_type"), //type决定是simple方式还是async方式
entity_name_t::MON(-1),
"simple_server",
0 /* nonce */,
0 /* flags */);
// enable timing prints
messenger->set_magic(MSG_MAGIC_TRACE_CTR);
messenger->set_default_policy(Messenger::Policy::stateless_server(0));
create()
函数根据传入的type确定具体返回的messenger类型,messenger类是父类,在simple中是返回SimpleMessenger,其构造函数中调用了
init_local_connection(); //初始化connection,并且一旦有新连接,需告知所有的dispatcher
void SimpleMessenger::init_local_connection()
{
local_connection->peer_addrs = *my_addrs;
local_connection->peer_type = my_name.type();
local_connection->set_features(CEPH_FEATURES_ALL);
ms_deliver_handle_fast_connect(local_connection.get());//告知所有的dispatcher
}
- 为messenger来bind IP
r = messenger->bind(bind_addr);
messenger.h中定义了bind的接口,具体实现在SimpleMessenger中
int SimpleMessenger::bind(const entity_addr_t &bind_addr)
{
lock.Lock();
if (started) {
ldout(cct,10) << "rank.bind already started" << dendl;
lock.Unlock();
return -1;
}
ldout(cct,10) << "rank.bind " << bind_addr << dendl;
lock.Unlock();
// bind to a socket
set<int> avoid_ports;
int r = accepter.bind(bind_addr, avoid_ports); //实际是将地址和端口bind到了accepter上
if (r >= 0)
did_bind = true;
return r;
}
分析可知,最终完成bind的实际是Accepter,该类继承自thread类,是用于在指定的地址和端口上监听即将出现的connection。accepter是封装的Linux的socket中的 方法,包括创建socket,bind,listen,创建pipe,是建立连接实际完成的模块
参考tcp通信流程
int Accepter::bind(const entity_addr_t &bind_addr, const set<int>& avoid_ports)
{
...
listen_sd = socket_cloexec(family, SOCK_STREAM, 0); //创建socket,封装了Linux的socket创建方法
...
...
// 设置地址
entity_addr_t listen_addr = bind_addr;
if (listen_addr.get_type() == entity_addr_t::TYPE_NONE) {
listen_addr.set_type(entity_addr_t::TYPE_LEGACY);
}
listen_addr.set_family(family);
...
...
//指定端口
if (listen_addr.get_port())
...
...
//开始bind,用的还是Linux的socket的bind方法
rc = ::bind(listen_sd, listen_addr.get_sockaddr(), listen_addr.get_sockaddr_len());
...
...
// listen!,Linux的方法,貌似并没有开启线程,只是准备好监听连接
rc = ::listen(listen_sd, msgr->cct->_conf->ms_tcp_listen_backlog);
...
...
//此处又执行了connection初始化,通知所有dispatcer?为啥两次?
//因为不是同一个messenger了,这里是accepter自己的messenger
msgr->init_local_connection();
//创建pipe
rc = create_selfpipe(&shutdown_rd_fd, &shutdown_wr_fd);
}
bind完成,在bind过程的最后一步创建了pipe
int Accepter::create_selfpipe(int *pipe_rd, int *pipe_wr) {
int selfpipe[2]; //接收和发送各一个
if (pipe_cloexec(selfpipe) < 0) { //执行Linux的pipe创建函数
int e = errno;
lderr(msgr->cct) << __func__ << " unable to create the selfpipe: "
<< cpp_strerror(e) << dendl;
return -e;
}
//为pipe的每个文件设置特性
for (size_t i = 0; i < std::size(selfpipe); i++) {
int rc = fcntl(selfpipe[i], F_GETFL);
ceph_assert(rc != -1);
rc = fcntl(selfpipe[i], F_SETFL, rc | O_NONBLOCK);
ceph_assert(rc != -1);
}
*pipe_rd = selfpipe[0];
*pipe_wr = selfpipe[1];
return 0;
}
- 初始化守护进程,貌似会开启log的线程和守护进程
// Set up crypto, daemonize, etc.
//global_init_daemonize(g_ceph_context, 0);
//log和守护进程开始,若已经存在是否就不需要创建?
common_init_finish(g_ceph_context);
- 创建dispatcher并添加到messenger的dispatcher队列头
dispatcher = new SimpleDispatcher(messenger);
messenger->add_dispatcher_head(dispatcher); // should reach ready()
void add_dispatcher_head(Dispatcher *d) {
bool first = dispatchers.empty();
dispatchers.push_front(d);
if (d->ms_can_fast_dispatch_any())
fast_dispatchers.push_front(d);
//需要ready后才能执行之后的程序
if (first)
ready();
}
同样的,ready函数在simplemessenger中实现,继承自messenger中的接口,在ready函数中会开启多个线程
void SimpleMessenger::ready()
{
ldout(cct,10) << "ready " << get_myaddr_legacy() << dendl;
dispatch_queue.start(); //开启了 dispatch_thread和 local_delivery_thread两个线程
lock.Lock();
//bind成功,开启accepter线程
if (did_bind)
accepter.start();
lock.Unlock();
}
- 开始simplemessenger
reaper_started = true; //开启reaper线程,用于回收通信关闭后的pipe资源
reaper_thread.create("ms_reaper");
- 开始工作,阻塞住main函数 的线程,直到messenger shutdown
messenger->wait(); // can't be called until ready()
...
...
void SimpleMessenger::wait()
{
...
if (!stopped)
stop_cond.Wait(lock); //阻塞住主线程,等待messenger的shutdown
...
// 结束开启的线程,没有见到log和守护进程,所以这俩不归messenger管?
accepter.stop();
...
dispatch_queue.shutdown();
...
if (reaper_started) {
ldout(cct,20) << "wait: stopping reaper thread" << dendl;
lock.Lock();
reaper_cond.Signal();
reaper_stop = true;
lock.Unlock();
reaper_thread.join();
reaper_started = false;
ldout(cct,20) << "wait: stopped reaper thread" << dendl;
}
...
// close+reap all pipes
lock.Lock();
{
ldout(cct,10) << "wait: closing pipes" << dendl;
while (!rank_pipe.empty()) {
Pipe *p = rank_pipe.begin()->second;
p->unregister_pipe();
p->pipe_lock.Lock();
p->stop_and_wait();
...
}
reaper(); //利用reaper来删除pipe
ldout(cct,10) << "wait: waiting for pipes " << pipes << " to close" << dendl;
while (!pipes.empty()) {
reaper_cond.Wait(lock);
reaper();
}
}
...
}
用流程图总结上述代码,可以表示为下图所示,此图从上述第五步的ready开始,描述了配置完成之后,simple通信方式的工作流程