protobuf使用基础

时间 2019-12-07

原文原文链接

1、protobuf简介

万能的google能够找到这个关于protobuf的简介，从实现说明上来看，并无特别值得说明的地方。对于一个协议或者存储来讲，最为关心的实际上是协议或存储的兼容性问题，其它的int变长编码并无什么特殊的，由于在这以前，utf-8之类的变长编码也一样使用相似的方法来进行编解码来节省流量。
看了说明以后，所谓的版本兼容须要的也只是为每一个字段定义一个永久的惟一数字ID，每一个字段的ID定义以后不能修改。在编码过程当中，在每一个字段(field)前加上一个tag，用于表示接下来字段的编码；对于读取方来讲，一样是根据这个ID来肯定接下来的字段如何解释，若是本身本地不识别这个ID，能够丢弃。可是这点一样也早就存在，在IP层和TCP层的options字段存储，使用的一样是这样相似的编码方式：node

Example Internet Datagramc++

google官方文档中对于该问题的说明git

Extending a Protocol Buffergithub

Sooner or later after you release the code that uses your protocol buffer, you will undoubtedly want to "improve" the protocol buffer's definition. If you want your new buffers to be backwards-compatible, and your old buffers to be forward-compatible – and you almost certainly do want this – then there are some rules you need to follow. In the new version of the protocol buffer:数组

you must not change the tag numbers of any existing fields.
you must not add or delete any required fields.
you may delete optional or repeated fields.
you may add new optional or repeated fields but you must use fresh tag numbers (i.e. tag numbers that were never used in this protocol buffer, not even by deleted fields)app

2、源码构建

因为这个工具大体看来的确没有什么能够深刻挖掘的，因此仍是先看下构建过程吧，按照工程的标准文档执行就好了。
tsecer@protobuf: sh autogen.sh
+ mkdir -p third_party/googletest/m4
+ autoreconf -f -i -Wall,no-obsolete
……
tsecer@protobuf: ./configure --prefix=/data1/harry/work/protobuf-master/protolib/ CXXFLAGS=-g
checking whether to enable maintainer-specific portions of Makefiles... yes
checking build system type... x86_64-unknown-linux-gnu
checking host system type... x86_64-unknown-linux-gnu
checking target system type... x86_64-unknown-linux-gnu
……
tsecer@protobuf: make
……
tsecer@protobuf: ls ./src/protoc
./src/protocless

tsecer@protobuf: g++ -std=c++11 *.cc -lprotobuf -g
tsecer@protobuf:
执行生成的可执行文件
./LD_LIBRARY_PATH=/usr/local/lib/ ./a.out ide

3、使用描述生成代码

这里使用的实际上是protobuf自带的例子，在protobuf-master\examples\addressbook.proto，protobuf-master\examples\add_person.cc
tsecer@protobuf: cat demo.proto
syntax = "proto2";工具

package tutorial;

message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;

enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}

message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}

repeated PhoneNumber phones = 4;
}

message AddressBook {
repeated Person people = 1;
}

tsecer@protobuf: ../src/protoc -I. --cpp_out=. demo.proto
tsecer@protobuf: ls
demo.pb.cc demo.pb.h demo.proto
tsecer@protobuf:
生成的内容：

因为number是一个string，因此增长了一些字符串之类的操做接口
// required string number = 1;
bool has_number() const;
void clear_number();
static const int kNumberFieldNumber = 1;
const std::string& number() const;
void set_number(const std::string& value);
void set_number(std::string&& value);
void set_number(const char* value);
void set_number(const char* value, size_t size);
std::string* mutable_number();
std::string* release_number();
void set_allocated_number(std::string* number);
……
// required int32 id = 2;
bool has_id() const;
void clear_id();
static const int kIdFieldNumber = 2;
::PROTOBUF_NAMESPACE_ID::int32 id() const;
void set_id(::PROTOBUF_NAMESPACE_ID::int32 value);

4、序列化的支持

一、从文件中反序列化代码

bool Person::MergePartialFromCodedStream(
::PROTOBUF_NAMESPACE_ID::io::CodedInputStream* input) {
#define DO_(EXPRESSION) if (!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure
::PROTOBUF_NAMESPACE_ID::uint32 tag;
// @@protoc_insertion_point(parse_start:tutorial.Person)
for (;;) {
::std::pair<::PROTOBUF_NAMESPACE_ID::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
tag = p.first;
if (!p.second) goto handle_unusual;
switch (::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::GetTagFieldNumber(tag)) {
// required string name = 1;
case 1: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (10 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadString(
input, this->mutable_name()));
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::PARSE,
"tutorial.Person.name");
} else {
goto handle_unusual;
}
break;
}

// required int32 id = 2;
case 2: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) {
HasBitSetters::set_has_id(this);
DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive<
::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>(
input, &id_)));
} else {
goto handle_unusual;
}
break;
}

// optional string email = 3;
case 3: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (26 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadString(
input, this->mutable_email()));
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::PARSE,
"tutorial.Person.email");
} else {
goto handle_unusual;
}
break;
}

// repeated .tutorial.Person.PhoneNumber phones = 4;
case 4: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (34 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadMessage(
input, add_phones()));
} else {
goto handle_unusual;
}
break;
}

default: {
handle_unusual:
if (tag == 0) {
goto success;
}
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SkipField(
input, tag, _internal_metadata_.mutable_unknown_fields()));
break;
}
}
}
success:
// @@protoc_insertion_point(parse_success:tutorial.Person)
return true;
failure:
// @@protoc_insertion_point(parse_failure:tutorial.Person)
return false;
#undef DO_
}

二、序列化代码支持

void Person::SerializeWithCachedSizes(
::PROTOBUF_NAMESPACE_ID::io::CodedOutputStream* output) const {
// @@protoc_insertion_point(serialize_start:tutorial.Person)
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
(void) cached_has_bits;

cached_has_bits = _has_bits_[0];
// required string name = 1;
if (cached_has_bits & 0x00000001u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SERIALIZE,
"tutorial.Person.name");
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteStringMaybeAliased(
1, this->name(), output);
}

// required int32 id = 2;
if (cached_has_bits & 0x00000004u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->id(), output);
}

// optional string email = 3;
if (cached_has_bits & 0x00000002u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SERIALIZE,
"tutorial.Person.email");
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteStringMaybeAliased(
3, this->email(), output);
}

// repeated .tutorial.Person.PhoneNumber phones = 4;
for (unsigned int i = 0,
n = static_cast<unsigned int>(this->phones_size()); i < n; i++) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteMessageMaybeToArray(
4,
this->phones(static_cast<int>(i)),
output);
}

if (_internal_metadata_.have_unknown_fields()) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SerializeUnknownFields(
_internal_metadata_.unknown_fields(), output);
}
// @@protoc_insertion_point(serialize_end:tutorial.Person)
}

5、反射支持

一、生成内容

const char descriptor_table_protodef_demo_2eproto[] =
"\n\ndemo.proto\022\010tutorial\"\333\001\n\006Person\022\014\n\004nam"
"e\030\001 \002(\t\022\n\n\002id\030\002 \002(\005\022\r\n\005email\030\003 \001(\t\022,\n\006ph"
"ones\030\004 \003(\0132\034.tutorial.Person.PhoneNumber"
"\032M\n\013PhoneNumber\022\016\n\006number\030\001 \002(\t\022.\n\004type\030"
"\002 \001(\0162\032.tutorial.Person.PhoneType:\004HOME\""
"+\n\tPhoneType\022\n\n\006MOBILE\020\000\022\010\n\004HOME\020\001\022\010\n\004WO"
"RK\020\002\"/\n\013AddressBook\022 \n\006people\030\001 \003(\0132\020.tu"
"torial.Person"
;

二、该文件格式的描述文件

对于每一个文件的生成的protodef格式，一样是使用proto文件描述，描述文件位于protobuf-master\src\google\protobuf\descriptor.proto，因为内容比较多，因此只摘录一些基本内容：
// Describes a complete .proto file.
message FileDescriptorProto {
optional string name = 1; // file name, relative to root of source tree
optional string package = 2; // e.g. "foo", "foo.bar", etc.

// Names of files imported by this file.
repeated string dependency = 3;
// Indexes of the public imported files in the dependency list above.
repeated int32 public_dependency = 10;
// Indexes of the weak imported files in the dependency list.
// For Google-internal migration only. Do not use.
repeated int32 weak_dependency = 11;

// All top-level definitions in this file.
repeated DescriptorProto message_type = 4;
repeated EnumDescriptorProto enum_type = 5;
repeated ServiceDescriptorProto service = 6;
repeated FieldDescriptorProto extension = 7;

optional FileOptions options = 8;

// This field contains optional information about the original source code.
// You may safely remove this entire field without harming runtime
// functionality of the descriptors -- the information is needed only by
// development tools.
optional SourceCodeInfo source_code_info = 9;

// The syntax of the proto file.
// The supported values are "proto2" and "proto3".
optional string syntax = 12;
}

三、为何描述文件中有换行、制表符、左括号这种文本内容

刚看到这个格式的时候我困惑了好久，感受这个是一个为了让显示更加友好的文本格式，可是后来才发现，这个只是碰巧有一些特殊的可现实字符而已，本质上仍是按照protobuf的编码格式。例如前面的描述文件，开始的两个换行符对应的二进制为1010，最低三个bit为类型

https://developers.google.com/protocol-buffers/docs/encoding
The available wire types are as follows:

Type Meaning Used For
0 Varint int32, int64, uint32, uint64, sint32, sint64, bool, enum
1 64-bit fixed64, sfixed64, double
2 Length-delimited string, bytes, embedded messages, packed repeated fields
3 Start group groups (deprecated)
4 End group groups (deprecated)
5 32-bit fixed32, sfixed32, float
Each key in the streamed message is a varint with the value (field_number << 3) | wire_type – in other words, the last three bits of the number store the wire type.
因此开始的\n表示fieldnum为1，类型为2，也就是后面有一个长度分隔符，接下来的1010表示字符串的长度为10，也就是字符串"demo.proto"的长度。

四、protoc如何生成这些字符串

对每一个字符的转义处理：protobuf-master\src\google\protobuf\stubs\strutil.cc
// ----------------------------------------------------------------------
// Escapes 'src' using C-style escape sequences, and appends the escaped string
// to 'dest'. This version is faster than calling CEscapeInternal as it computes
// the required space using a lookup table, and also does not do any special
// handling for Hex or UTF-8 characters.
// ----------------------------------------------------------------------
void CEscapeAndAppend(StringPiece src, string* dest) {
size_t escaped_len = CEscapedLength(src);
if (escaped_len == src.size()) {
dest->append(src.data(), src.size());
return;
}

size_t cur_dest_len = dest->size();
dest->resize(cur_dest_len + escaped_len);
char* append_ptr = &(*dest)[cur_dest_len];

for (int i = 0; i < src.size(); ++i) {
unsigned char c = static_cast<unsigned char>(src[i]);
switch (c) {
case '\n': *append_ptr++ = '\\'; *append_ptr++ = 'n'; break;
case '\r': *append_ptr++ = '\\'; *append_ptr++ = 'r'; break;
case '\t': *append_ptr++ = '\\'; *append_ptr++ = 't'; break;
case '\"': *append_ptr++ = '\\'; *append_ptr++ = '\"'; break;
case '\'': *append_ptr++ = '\\'; *append_ptr++ = '\''; break;
case '\\': *append_ptr++ = '\\'; *append_ptr++ = '\\'; break;
default:
if (!isprint(c)) {
*append_ptr++ = '\\';
*append_ptr++ = '0' + c / 64;
*append_ptr++ = '0' + (c % 64) / 8;
*append_ptr++ = '0' + c % 8;
} else {
*append_ptr++ = c;
}
break;
}
}
}

6、protobuf内部如何表示repeated内容

一、基础结构

简单来看，就是长度加上起始地址
protobuf-master\src\google\protobuf\repeated_field.h
int current_size_;
int total_size_;
struct Rep {
Arena* arena;
Element elements[1];
};
// We can not use sizeof(Rep) - sizeof(Element) due to the trailing padding on
// the struct. We can not use sizeof(Arena*) as well because there might be
// a "gap" after the field arena and before the field elements (e.g., when
// Element is double and pointer is 32bit).
static const size_t kRepHeaderSize;

// We reuse the Rep* for an Arena* when total_size == 0, to avoid having to do
// an allocation in the constructor when we have an Arena.
union Pointer {
Pointer(Arena* a) : arena(a) {}
Arena* arena; // When total_size_ == 0.
Element* elements; // When total_size_ != 0, this is Rep->elements of Rep.
} ptr_;

Element* elements() const {
GOOGLE_DCHECK_GT(total_size_, 0);
return ptr_.elements;
}

二、如何扩容

至关于realloc同样，申请新的内存空间，而后整个数组进行移动。如此说来，保存一个对象的指针岂不是危险的？
template <typename Element>
inline void RepeatedField<Element>::Add(const Element& value) {
if (current_size_ == total_size_) Reserve(total_size_ + 1);
elements()[current_size_++] = value;
}

// Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant
// amount of code bloat.
template <typename Element>
void RepeatedField<Element>::Reserve(int new_size) {
if (total_size_ >= new_size) return;
Rep* old_rep = total_size_ > 0 ? rep() : NULL;
Rep* new_rep;
Arena* arena = GetArenaNoVirtual();
new_size = std::max(internal::kMinRepeatedFieldAllocationSize,
std::max(total_size_ * 2, new_size));
GOOGLE_DCHECK_LE(
static_cast<size_t>(new_size),
(std::numeric_limits<size_t>::max() - kRepHeaderSize) / sizeof(Element))
<< "Requested size is too large to fit into size_t.";
size_t bytes = kRepHeaderSize + sizeof(Element) * static_cast<size_t>(new_size);
if (arena == NULL) {
new_rep = static_cast<Rep*>(::operator new(bytes));
} else {
new_rep = reinterpret_cast<Rep*>(Arena::CreateArray<char>(arena, bytes));
}
new_rep->arena = arena;
int old_total_size = total_size_;
total_size_ = new_size;
ptr_.elements = new_rep->elements;
// Invoke placement-new on newly allocated elements. We shouldn't have to do
// this, since Element is supposed to be POD, but a previous version of this
// code allocated storage with "new Element[size]" and some code uses
// RepeatedField with non-POD types, relying on constructor invocation. If
// Element has a trivial constructor (e.g., int32), gcc (tested with -O2)
// completely removes this loop because the loop body is empty, so this has no
// effect unless its side-effects are required for correctness.
// Note that we do this before MoveArray() below because Element's copy
// assignment implementation will want an initialized instance first.
Element* e = &elements()[0];
Element* limit = e + total_size_;
for (; e < limit; e++) {
new (e) Element;
}
if (current_size_ > 0) {
MoveArray(&elements()[0], old_rep->elements, current_size_);
}

// Likewise, we need to invoke destructors on the old array.
InternalDeallocate(old_rep, old_total_size);

}

7、如何在项目中使用protobuf

一、message定义

能够参考paxos中的使用方法phxpaxos-master\src\algorithm\instance.cpp：
message Header
{
required uint64 gid = 1;
required uint64 rid = 2;
required int32 cmdid = 3;
optional int32 version = 4;
};

message PaxosMsg
{
required int32 MsgType = 1;
optional uint64 InstanceID = 2;
optional uint64 NodeID = 3;
optional uint64 ProposalID = 4;
optional uint64 ProposalNodeID = 5;
optional bytes Value = 6;
optional uint64 PreAcceptID = 7;
optional uint64 PreAcceptNodeID = 8;
optional uint64 RejectByPromiseID = 9;
optional uint64 NowInstanceID = 10;
optional uint64 MinChosenInstanceID = 11;
optional uint32 LastChecksum = 12;
optional uint32 Flag = 13;
optional bytes SystemVariables = 14;
optional bytes MasterVariables = 15;
};

二、代码中使用消息

void Instance :: OnReceive(const std::string & sBuffer)
{
BP->GetInstanceBP()->OnReceive();

if (sBuffer.size() <= 6)
{
PLGErr("buffer size %zu too short", sBuffer.size());
return;
}

Header oHeader;
size_t iBodyStartPos = 0;
size_t iBodyLen = 0;
int ret = Base::UnPackBaseMsg(sBuffer, oHeader, iBodyStartPos, iBodyLen);
if (ret != 0)
{
return;
}

int iCmd = oHeader.cmdid();

if (iCmd == MsgCmd_PaxosMsg)
{
if (m_oCheckpointMgr.InAskforcheckpointMode())
{
PLGImp("in ask for checkpoint mode, ignord paxosmsg");
return;
}

PaxosMsg oPaxosMsg;
bool bSucc = oPaxosMsg.ParseFromArray(sBuffer.data() + iBodyStartPos, iBodyLen);
if (!bSucc)
{
BP->GetInstanceBP()->OnReceiveParseError();
PLGErr("PaxosMsg.ParseFromArray fail, skip this msg");
return;
}

if (!ReceiveMsgHeaderCheck(oHeader, oPaxosMsg.nodeid()))
{
return;
}

OnReceivePaxosMsg(oPaxosMsg);
}
else if (iCmd == MsgCmd_CheckpointMsg)
{
CheckpointMsg oCheckpointMsg;
bool bSucc = oCheckpointMsg.ParseFromArray(sBuffer.data() + iBodyStartPos, iBodyLen);
if (!bSucc)
{
BP->GetInstanceBP()->OnReceiveParseError();
PLGErr("PaxosMsg.ParseFromArray fail, skip this msg");
return;
}

if (!ReceiveMsgHeaderCheck(oHeader, oCheckpointMsg.nodeid()))
{
return;
}

OnReceiveCheckpointMsg(oCheckpointMsg);
}
}

8、在protobuf中自定义方法

简单来讲，就是不推荐、不容许https://stackoverflow.com/questions/3897229/extending-protobuf-with-my-own-methods