protobuf使用基础

一、protobuf简介

万能的google可以找到这个关于protobuf的简介,从实现说明上来看,并没有特别值得说明的地方。对于一个协议或者存储来说,最为关心的其实是协议或存储的兼容性问题,其它的int变长编码并没有什么特殊的,因为在这之前,utf-8之类的变长编码也同样使用类似的方法来进行编解码来节省流量。
看了说明之后,所谓的版本兼容需要的也只是为每个字段定义一个永久的唯一数字ID,每个字段的ID定义之后不能修改。在编码过程中,在每个字段(field)前加上一个tag,用于表示接下来字段的编码;对于读取方来说,同样是根据这个ID来确定接下来的字段如何解释,如果自己本地不识别这个ID,可以丢弃。但是这点同样也早就存在,在IP层和TCP层的options字段存储,使用的同样是这样类似的编码方式:

0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Ver= 4 |IHL= 8 |Type of Service| Total Length = 576 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification = 111 |Flg=0| Fragment Offset = 0 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time = 123 | Protocol = 6 | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| source address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| destination address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Opt. Code = x | Opt. Len.= 3 | option value | Opt. Code = x |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Opt. Len. = 4 | option value | Opt. Code = 1 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Opt. Code = y | Opt. Len. = 3 | option value | Opt. Code = 0 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| data |
\ \
\ \
| data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

Example Internet Datagram

google官方文档中对于该问题的说明

Extending a Protocol Buffer

Sooner or later after you release the code that uses your protocol buffer, you will undoubtedly want to "improve" the protocol buffer's definition. If you want your new buffers to be backwards-compatible, and your old buffers to be forward-compatible – and you almost certainly do want this – then there are some rules you need to follow. In the new version of the protocol buffer:

you must not change the tag numbers of any existing fields.
you must not add or delete any required fields.
you may delete optional or repeated fields.
you may add new optional or repeated fields but you must use fresh tag numbers (i.e. tag numbers that were never used in this protocol buffer, not even by deleted fields)

二、源码构建

由于这个工具大致看来的确没有什么可以深入挖掘的,所以还是先看下构建过程吧,按照工程的标准文档执行就行了。
tsecer@protobuf: sh autogen.sh
+ mkdir -p third_party/googletest/m4
+ autoreconf -f -i -Wall,no-obsolete
……
tsecer@protobuf: ./configure --prefix=/data1/harry/work/protobuf-master/protolib/ CXXFLAGS=-g
checking whether to enable maintainer-specific portions of Makefiles... yes
checking build system type... x86_64-unknown-linux-gnu
checking host system type... x86_64-unknown-linux-gnu
checking target system type... x86_64-unknown-linux-gnu
……
tsecer@protobuf: make
……
tsecer@protobuf: ls ./src/protoc
./src/protoc

tsecer@protobuf: g++ -std=c++11 *.cc -lprotobuf -g
tsecer@protobuf:
执行生成的可执行文件
./LD_LIBRARY_PATH=/usr/local/lib/ ./a.out

三、使用描述生成代码

这里使用的其实是protobuf自带的例子,在protobuf-master\examples\addressbook.proto,protobuf-master\examples\add_person.cc
tsecer@protobuf: cat demo.proto
syntax = "proto2";

package tutorial;

message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;

enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}

message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}

repeated PhoneNumber phones = 4;
}

message AddressBook {
repeated Person people = 1;
}

tsecer@protobuf: ../src/protoc -I. --cpp_out=. demo.proto
tsecer@protobuf: ls
demo.pb.cc demo.pb.h demo.proto
tsecer@protobuf:
生成的内容:

由于number是一个string,所以增加了一些字符串之类的操作接口
// required string number = 1;
bool has_number() const;
void clear_number();
static const int kNumberFieldNumber = 1;
const std::string& number() const;
void set_number(const std::string& value);
void set_number(std::string&& value);
void set_number(const char* value);
void set_number(const char* value, size_t size);
std::string* mutable_number();
std::string* release_number();
void set_allocated_number(std::string* number);
……
// required int32 id = 2;
bool has_id() const;
void clear_id();
static const int kIdFieldNumber = 2;
::PROTOBUF_NAMESPACE_ID::int32 id() const;
void set_id(::PROTOBUF_NAMESPACE_ID::int32 value);

四、序列化的支持

1、从文件中反序列化代码

bool Person::MergePartialFromCodedStream(
::PROTOBUF_NAMESPACE_ID::io::CodedInputStream* input) {
#define DO_(EXPRESSION) if (!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure
::PROTOBUF_NAMESPACE_ID::uint32 tag;
// @@protoc_insertion_point(parse_start:tutorial.Person)
for (;;) {
::std::pair<::PROTOBUF_NAMESPACE_ID::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
tag = p.first;
if (!p.second) goto handle_unusual;
switch (::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::GetTagFieldNumber(tag)) {
// required string name = 1;
case 1: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (10 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadString(
input, this->mutable_name()));
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::PARSE,
"tutorial.Person.name");
} else {
goto handle_unusual;
}
break;
}

// required int32 id = 2;
case 2: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) {
HasBitSetters::set_has_id(this);
DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive<
::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>(
input, &id_)));
} else {
goto handle_unusual;
}
break;
}

// optional string email = 3;
case 3: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (26 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadString(
input, this->mutable_email()));
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::PARSE,
"tutorial.Person.email");
} else {
goto handle_unusual;
}
break;
}

// repeated .tutorial.Person.PhoneNumber phones = 4;
case 4: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (34 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadMessage(
input, add_phones()));
} else {
goto handle_unusual;
}
break;
}

default: {
handle_unusual:
if (tag == 0) {
goto success;
}
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SkipField(
input, tag, _internal_metadata_.mutable_unknown_fields()));
break;
}
}
}
success:
// @@protoc_insertion_point(parse_success:tutorial.Person)
return true;
failure:
// @@protoc_insertion_point(parse_failure:tutorial.Person)
return false;
#undef DO_
}

2、序列化代码支持

void Person::SerializeWithCachedSizes(
::PROTOBUF_NAMESPACE_ID::io::CodedOutputStream* output) const {
// @@protoc_insertion_point(serialize_start:tutorial.Person)
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
(void) cached_has_bits;

cached_has_bits = _has_bits_[0];
// required string name = 1;
if (cached_has_bits & 0x00000001u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SERIALIZE,
"tutorial.Person.name");
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteStringMaybeAliased(
1, this->name(), output);
}

// required int32 id = 2;
if (cached_has_bits & 0x00000004u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->id(), output);
}

// optional string email = 3;
if (cached_has_bits & 0x00000002u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SERIALIZE,
"tutorial.Person.email");
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteStringMaybeAliased(
3, this->email(), output);
}

// repeated .tutorial.Person.PhoneNumber phones = 4;
for (unsigned int i = 0,
n = static_cast<unsigned int>(this->phones_size()); i < n; i++) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteMessageMaybeToArray(
4,
this->phones(static_cast<int>(i)),
output);
}

if (_internal_metadata_.have_unknown_fields()) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SerializeUnknownFields(
_internal_metadata_.unknown_fields(), output);
}
// @@protoc_insertion_point(serialize_end:tutorial.Person)
}

五、反射支持

1、生成内容

const char descriptor_table_protodef_demo_2eproto[] =
"\n\ndemo.proto\022\010tutorial\"\333\001\n\006Person\022\014\n\004nam"
"e\030\001 \002(\t\022\n\n\002id\030\002 \002(\005\022\r\n\005email\030\003 \001(\t\022,\n\006ph"
"ones\030\004 \003(\0132\034.tutorial.Person.PhoneNumber"
"\032M\n\013PhoneNumber\022\016\n\006number\030\001 \002(\t\022.\n\004type\030"
"\002 \001(\0162\032.tutorial.Person.PhoneType:\004HOME\""
"+\n\tPhoneType\022\n\n\006MOBILE\020\000\022\010\n\004HOME\020\001\022\010\n\004WO"
"RK\020\002\"/\n\013AddressBook\022 \n\006people\030\001 \003(\0132\020.tu"
"torial.Person"
;

2、该文件格式的描述文件

对于每个文件的生成的protodef格式,同样是使用proto文件描述,描述文件位于protobuf-master\src\google\protobuf\descriptor.proto,由于内容比较多,所以只摘录一些基本内容:
// Describes a complete .proto file.
message FileDescriptorProto {
optional string name = 1; // file name, relative to root of source tree
optional string package = 2; // e.g. "foo", "foo.bar", etc.

// Names of files imported by this file.
repeated string dependency = 3;
// Indexes of the public imported files in the dependency list above.
repeated int32 public_dependency = 10;
// Indexes of the weak imported files in the dependency list.
// For Google-internal migration only. Do not use.
repeated int32 weak_dependency = 11;

// All top-level definitions in this file.
repeated DescriptorProto message_type = 4;
repeated EnumDescriptorProto enum_type = 5;
repeated ServiceDescriptorProto service = 6;
repeated FieldDescriptorProto extension = 7;

optional FileOptions options = 8;

// This field contains optional information about the original source code.
// You may safely remove this entire field without harming runtime
// functionality of the descriptors -- the information is needed only by
// development tools.
optional SourceCodeInfo source_code_info = 9;

// The syntax of the proto file.
// The supported values are "proto2" and "proto3".
optional string syntax = 12;
}

3、为什么描述文件中有换行、制表符、左括号这种文本内容


刚看到这个格式的时候我困惑了很久,感觉这个是一个为了让显示更加友好的文本格式,但是后来才发现,这个只是碰巧有一些特殊的可现实字符而已,本质上还是按照protobuf的编码格式。例如前面的描述文件,开始的两个换行符对应的二进制为1010,最低三个bit为类型

https://developers.google.com/protocol-buffers/docs/encoding
The available wire types are as follows:

Type Meaning Used For
0 Varint int32, int64, uint32, uint64, sint32, sint64, bool, enum
1 64-bit fixed64, sfixed64, double
2 Length-delimited string, bytes, embedded messages, packed repeated fields
3 Start group groups (deprecated)
4 End group groups (deprecated)
5 32-bit fixed32, sfixed32, float
Each key in the streamed message is a varint with the value (field_number << 3) | wire_type – in other words, the last three bits of the number store the wire type.
所以开始的\n表示fieldnum为1,类型为2,也就是后面有一个长度分隔符,接下来的1010表示字符串的长度为10,也就是字符串"demo.proto"的长度。

4、protoc如何生成这些字符串


对每个字符的转义处理:protobuf-master\src\google\protobuf\stubs\strutil.cc
// ----------------------------------------------------------------------
// Escapes 'src' using C-style escape sequences, and appends the escaped string
// to 'dest'. This version is faster than calling CEscapeInternal as it computes
// the required space using a lookup table, and also does not do any special
// handling for Hex or UTF-8 characters.
// ----------------------------------------------------------------------
void CEscapeAndAppend(StringPiece src, string* dest) {
size_t escaped_len = CEscapedLength(src);
if (escaped_len == src.size()) {
dest->append(src.data(), src.size());
return;
}

size_t cur_dest_len = dest->size();
dest->resize(cur_dest_len + escaped_len);
char* append_ptr = &(*dest)[cur_dest_len];

for (int i = 0; i < src.size(); ++i) {
unsigned char c = static_cast<unsigned char>(src[i]);
switch (c) {
case '\n': *append_ptr++ = '\\'; *append_ptr++ = 'n'; break;
case '\r': *append_ptr++ = '\\'; *append_ptr++ = 'r'; break;
case '\t': *append_ptr++ = '\\'; *append_ptr++ = 't'; break;
case '\"': *append_ptr++ = '\\'; *append_ptr++ = '\"'; break;
case '\'': *append_ptr++ = '\\'; *append_ptr++ = '\''; break;
case '\\': *append_ptr++ = '\\'; *append_ptr++ = '\\'; break;
default:
if (!isprint(c)) {
*append_ptr++ = '\\';
*append_ptr++ = '0' + c / 64;
*append_ptr++ = '0' + (c % 64) / 8;
*append_ptr++ = '0' + c % 8;
} else {
*append_ptr++ = c;
}
break;
}
}
}

六、protobuf内部如何表示repeated内容

1、基础结构

简单来看,就是长度加上起始地址
protobuf-master\src\google\protobuf\repeated_field.h
int current_size_;
int total_size_;
struct Rep {
Arena* arena;
Element elements[1];
};
// We can not use sizeof(Rep) - sizeof(Element) due to the trailing padding on
// the struct. We can not use sizeof(Arena*) as well because there might be
// a "gap" after the field arena and before the field elements (e.g., when
// Element is double and pointer is 32bit).
static const size_t kRepHeaderSize;

// We reuse the Rep* for an Arena* when total_size == 0, to avoid having to do
// an allocation in the constructor when we have an Arena.
union Pointer {
Pointer(Arena* a) : arena(a) {}
Arena* arena; // When total_size_ == 0.
Element* elements; // When total_size_ != 0, this is Rep->elements of Rep.
} ptr_;

Element* elements() const {
GOOGLE_DCHECK_GT(total_size_, 0);
return ptr_.elements;
}

2、如何扩容

相当于realloc一样,申请新的内存空间,然后整个数组进行移动。如此说来,保存一个对象的指针岂不是危险的?
template <typename Element>
inline void RepeatedField<Element>::Add(const Element& value) {
if (current_size_ == total_size_) Reserve(total_size_ + 1);
elements()[current_size_++] = value;
}

// Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant
// amount of code bloat.
template <typename Element>
void RepeatedField<Element>::Reserve(int new_size) {
if (total_size_ >= new_size) return;
Rep* old_rep = total_size_ > 0 ? rep() : NULL;
Rep* new_rep;
Arena* arena = GetArenaNoVirtual();
new_size = std::max(internal::kMinRepeatedFieldAllocationSize,
std::max(total_size_ * 2, new_size));
GOOGLE_DCHECK_LE(
static_cast<size_t>(new_size),
(std::numeric_limits<size_t>::max() - kRepHeaderSize) / sizeof(Element))
<< "Requested size is too large to fit into size_t.";
size_t bytes = kRepHeaderSize + sizeof(Element) * static_cast<size_t>(new_size);
if (arena == NULL) {
new_rep = static_cast<Rep*>(::operator new(bytes));
} else {
new_rep = reinterpret_cast<Rep*>(Arena::CreateArray<char>(arena, bytes));
}
new_rep->arena = arena;
int old_total_size = total_size_;
total_size_ = new_size;
ptr_.elements = new_rep->elements;
// Invoke placement-new on newly allocated elements. We shouldn't have to do
// this, since Element is supposed to be POD, but a previous version of this
// code allocated storage with "new Element[size]" and some code uses
// RepeatedField with non-POD types, relying on constructor invocation. If
// Element has a trivial constructor (e.g., int32), gcc (tested with -O2)
// completely removes this loop because the loop body is empty, so this has no
// effect unless its side-effects are required for correctness.
// Note that we do this before MoveArray() below because Element's copy
// assignment implementation will want an initialized instance first.
Element* e = &elements()[0];
Element* limit = e + total_size_;
for (; e < limit; e++) {
new (e) Element;
}
if (current_size_ > 0) {
MoveArray(&elements()[0], old_rep->elements, current_size_);
}

// Likewise, we need to invoke destructors on the old array.
InternalDeallocate(old_rep, old_total_size);

}

七、如何在项目中使用protobuf

1、message定义

可以参考paxos中的使用方法phxpaxos-master\src\algorithm\instance.cpp:
message Header
{
required uint64 gid = 1;
required uint64 rid = 2;
required int32 cmdid = 3;
optional int32 version = 4;
};

message PaxosMsg
{
required int32 MsgType = 1;
optional uint64 InstanceID = 2;
optional uint64 NodeID = 3;
optional uint64 ProposalID = 4;
optional uint64 ProposalNodeID = 5;
optional bytes Value = 6;
optional uint64 PreAcceptID = 7;
optional uint64 PreAcceptNodeID = 8;
optional uint64 RejectByPromiseID = 9;
optional uint64 NowInstanceID = 10;
optional uint64 MinChosenInstanceID = 11;
optional uint32 LastChecksum = 12;
optional uint32 Flag = 13;
optional bytes SystemVariables = 14;
optional bytes MasterVariables = 15;
};

2、代码中使用消息

void Instance :: OnReceive(const std::string & sBuffer)
{
BP->GetInstanceBP()->OnReceive();

if (sBuffer.size() <= 6)
{
PLGErr("buffer size %zu too short", sBuffer.size());
return;
}

Header oHeader;
size_t iBodyStartPos = 0;
size_t iBodyLen = 0;
int ret = Base::UnPackBaseMsg(sBuffer, oHeader, iBodyStartPos, iBodyLen);
if (ret != 0)
{
return;
}

int iCmd = oHeader.cmdid();

if (iCmd == MsgCmd_PaxosMsg)
{
if (m_oCheckpointMgr.InAskforcheckpointMode())
{
PLGImp("in ask for checkpoint mode, ignord paxosmsg");
return;
}

PaxosMsg oPaxosMsg;
bool bSucc = oPaxosMsg.ParseFromArray(sBuffer.data() + iBodyStartPos, iBodyLen);
if (!bSucc)
{
BP->GetInstanceBP()->OnReceiveParseError();
PLGErr("PaxosMsg.ParseFromArray fail, skip this msg");
return;
}

if (!ReceiveMsgHeaderCheck(oHeader, oPaxosMsg.nodeid()))
{
return;
}

OnReceivePaxosMsg(oPaxosMsg);
}
else if (iCmd == MsgCmd_CheckpointMsg)
{
CheckpointMsg oCheckpointMsg;
bool bSucc = oCheckpointMsg.ParseFromArray(sBuffer.data() + iBodyStartPos, iBodyLen);
if (!bSucc)
{
BP->GetInstanceBP()->OnReceiveParseError();
PLGErr("PaxosMsg.ParseFromArray fail, skip this msg");
return;
}

if (!ReceiveMsgHeaderCheck(oHeader, oCheckpointMsg.nodeid()))
{
return;
}

OnReceiveCheckpointMsg(oCheckpointMsg);
}
}

八、在protobuf中自定义方法

简单来说,就是不推荐、不允许
https://stackoverflow.com/questions/3897229/extending-protobuf-with-my-own-methods

 

posted on 2019-03-26 14:46  tsecer  阅读(1885)  评论(0编辑  收藏  举报

导航