datastream解析
在EOS的eosiolib模块中有一个datasteam.hpp文件,它几乎实现了所有类型对字节流的转换,是一个非常强大的工具类,在这里对它的做一个简单的提取,也加强一下自己对它的理解。在下面的工程中有三个头文件与EOS源码对应如下:
#include"datastream.h" --> \eos\contracts\eosiolib\datastream.hpp //去掉了异常,改为代码处理
#include"serialize.h" --> \eos\contracts\eosiolib\serialize.hpp //未修改
#include"varint.h" --> \eos\contracts\eosiolib\varint.hpp //未修改
示例代码编译环境: ubuntu 16.04 boost 1.67 Qt Creator
下面我们从我的简单实例进行分析,相信理解了这部分原题,eos中的datastream也自己可以进行修改了。
先来看看我们需要实现的功能,main函数如下编写:
1 #include <iostream> 2 #include<vector> 3 #include<functional> 4 #include<algorithm> 5 #include <iterator> 6 #include<string> 7 #include<vector> 8 #include<set> 9 #include<map> 10 #include"datastream.h" 11 #include"serialize.h" 12 13 class CBase{ 14 public: 15 std::string m_strBase = "bright"; 16 std::vector<char> m_vcBase; 17 18 EOSLIB_SERIALIZE( CBase, (m_strBase)(m_vcBase) ) 19 }; 20 21 class CDerive:public CBase { 22 public: 23 std::string m_strDerive = "Derive"; 24 std::set<std::string> m_derSet; 25 26 EOSLIB_SERIALIZE_DERIVED( CDerive, CBase, (m_strDerive) (m_derSet) ) 27 }; 28 29 void printderive(const CDerive& derive) 30 { 31 std::cout << derive.m_strBase.data() << " " << derive.m_strDerive.data() << std::endl; 32 copy(derive.m_vcBase.begin(), derive.m_vcBase.end(), std::ostream_iterator<char>(std::cout, " ")); 33 std::cout << "\n"; 34 copy(derive.m_derSet.begin(), derive.m_derSet.end(), std::ostream_iterator<std::string>(std::cout, " ")); 35 } 36 37 int main() 38 { 39 CDerive derive; 40 derive.m_vcBase = { 'a', 'b' , 'c', 'd', 'e', 'f', 'g', 'h', 'i'}; 41 derive.m_derSet = {"one", "two", "three", "four", "five"}; 42 43 printderive(derive); 44 45 bytes packed_derive = pack(derive); 46 size_t size = packed_derive.size(); 47 int32_t data = 0; 48 49 std::cout << "\n"; 50 for(bytes::iterator iter = packed_derive.begin(); iter !=packed_derive.end(); ++iter) 51 { 52 data = *iter; 53 if(data <'a') 54 { 55 std::cout << data << " "; 56 } 57 else 58 { 59 std::cout << *iter << " "; 60 } 61 } 62 63 std::cout << "\n"; 64 65 CDerive copy_derive; 66 copy_derive = unpack<CDerive>(packed_derive); 67 printderive(copy_derive); 68 69 return 0; 70 }
运行后的打印信息如下
bright Derive a b c d e f g h i five four one three two 6 b r i g h t 9 a b c d e f g h i 6 68 e r i v e 5 4 f i v e 4 f o u r 3 o n e 5 t h r e e 3 t w o bright Derive a b c d e f g h i five four one three two
从第四行的打印信息我们可以清楚地看到类对象的数据变成了一种格式:长度+内容。所有的数据从基数开始依次被放入到流中,在前面加入了长度,而对于容器类型std::set<std::string>,首先会记录set的实际数据长度,再记录string的长度。因此,在我里我们就可以联想到,任何一种数据我们都可以按照自己的想法去转化成datastream,如map,tuple,deque及自定义类型。那在上面的功能中最关键的为两个宏:EOSLIB_SERIALIZE与EOSLIB_SERIALIZE_DERIVED。 他们如何实现的呢?请看serialize.h文件的实现:
1 #ifndef SERIALIZE_H 2 #define SERIALIZE_H 3 #include<iostream> 4 #include<string> 5 #include"datastream.h" 6 #include <boost/preprocessor/seq/enum.hpp> 7 #include <boost/preprocessor/seq/size.hpp> 8 #include <boost/preprocessor/seq/seq.hpp> 9 #include<boost/preprocessor/seq/for_each.hpp> 10 11 12 #define EOSLIB_REFLECT_MEMBER_OP( r, OP, elem ) \ 13 OP t.elem 14 15 #define EOSLIB_SERIALIZE( TYPE, MEMBERS ) \ 16 template<typename DataStream> \ 17 friend DataStream& operator << ( DataStream& ds, const TYPE& t ){ \ 18 return ds BOOST_PP_SEQ_FOR_EACH( EOSLIB_REFLECT_MEMBER_OP, <<, MEMBERS );\ 19 }\ 20 template<typename DataStream> \ 21 friend DataStream& operator >> ( DataStream& ds, TYPE& t ){ \ 22 return ds BOOST_PP_SEQ_FOR_EACH( EOSLIB_REFLECT_MEMBER_OP, >>, MEMBERS );\ 23 } 24 25 #define EOSLIB_SERIALIZE_DERIVED( TYPE, BASE, MEMBERS ) \ 26 template<typename DataStream> \ 27 friend DataStream& operator << ( DataStream& ds, const TYPE& t ){ \ 28 ds << static_cast<const BASE&>(t); \ 29 return ds BOOST_PP_SEQ_FOR_EACH( EOSLIB_REFLECT_MEMBER_OP, <<, MEMBERS );\ 30 }\ 31 template<typename DataStream> \ 32 friend DataStream& operator >> ( DataStream& ds, TYPE& t ){ \ 33 ds >> static_cast<BASE&>(t); \ 34 return ds BOOST_PP_SEQ_FOR_EACH( EOSLIB_REFLECT_MEMBER_OP, >>, MEMBERS );\ 35 } 36 37 #endif // SERIALIZE_H
把main.cpp文件用上面的宏替换,可以看出其实就是在每个类中实现了自己的输入输出流。其中 BOOST_PP_SEQ_FOR_EACH( EOSLIB_REFLECT_MEMBER_OP, >>, MEMBERS ); 的意思是把对象的所需要的多个成员变量按宏依次展开。比如在CBase中的此句
ds BOOST_PP_SEQ_FOR_EACH( EOSLIB_REFLECT_MEMBER_OP, <<, MEMBERS );
会替换成:
ds << CBase.m_strBase << CBase.m_vcBase ;
因为返回值为datastream,故会循环调用直至无成员变量。另外,还有以下两个特点:
1.派生类的会调用基类的,直到最底层的基类(多重继承我没有测试过);
2.在ds << CBase.m_strBase << CBase.m_vcBase ; 中,类中的成员变量使用<<和 >>时是需要自己定义类型的转换的,即类中的成员变量和datastream是如何相互转换的,这也是我们接下来讨论的问题。
datastream的实现如下:
1 #ifndef DATASTREAM_H 2 #define DATASTREAM_H 3 4 #include<iostream> 5 #include<stdint.h> 6 #include<memory> 7 #include<cstring> 8 #include<vector> 9 #include<set> 10 #include<map> 11 #include"serialize.h" 12 #include"varint.h" 13 14 15 template<typename T> 16 class datastream { 17 public: 18 datastream(T start, size_t s) 19 :_start(start),_pos(start),_end(start + s) {} 20 21 inline void skip( size_t s) { _pos += s; } 22 inline bool read( char* d, size_t s){ 23 if( size_t(_end - _pos) < (size_t)s ) 24 { 25 return false; 26 } 27 28 memcpy(d, _pos, s); 29 _pos += s; 30 31 return true; 32 } 33 34 inline bool write(const char* d, size_t s){ 35 if(_end -_pos < (int32_t)s ) 36 { 37 return false; 38 } 39 memcpy((void*)_pos, d, s); 40 _pos += s; 41 42 return true; 43 } 44 45 inline bool put(char c) { 46 if(_pos >= _end) { 47 return false; 48 } 49 *_pos = c; 50 ++_pos; 51 return true; 52 } 53 54 inline bool get( unsigned char& c ) { return get( *(char*)&c ); } 55 56 inline bool get( char& c ) 57 { 58 if(_pos >= _end) 59 { 60 return false; 61 } 62 63 c = *_pos; 64 ++_pos; 65 return true; 66 } 67 68 T pos()const { return _pos; } 69 inline bool valid()const { return _pos <= _end && _pos >= _start; } 70 71 inline bool seekp(size_t p) { _pos = _start + p; return _pos <= _end; } 72 73 inline size_t tellp()const { return size_t(_pos - _start); } 74 75 inline size_t remaining()const { return _end - _pos; } 76 77 private: 78 T _start; 79 T _pos; 80 T _end; 81 }; 82 83 template<> 84 class datastream<size_t> { 85 public: 86 datastream( size_t init_size = 0):_size(init_size){} 87 inline bool skip( size_t s ) { _size += s; return true; } 88 inline bool write( const char* ,size_t s ) { _size += s; return true; } 89 inline bool put(char ) { ++_size; return true; } 90 inline bool valid()const { return true; } 91 inline bool seekp(size_t p) { _size = p; return true; } 92 inline size_t tellp()const { return _size; } 93 inline size_t remaining()const { return 0; } 94 private: 95 size_t _size; 96 }; 97 98 typedef std::vector<char> bytes; 99 100 template<typename DataStream> 101 DataStream& operator << ( DataStream& ds, const std::string& v ) { 102 ds << unsigned_int( v.size() ); 103 if (v.size()) 104 ds.write(v.data(), v.size()); 105 return ds; 106 } 107 108 template<typename DataStream> 109 DataStream& operator >> ( DataStream& ds, std::string& v ) { 110 std::vector<char> tmp; 111 ds >> tmp; 112 if( tmp.size() ) 113 v = std::string(tmp.data(),tmp.data()+tmp.size()); 114 else 115 v = std::string(); 116 return ds; 117 } 118 119 template<typename DataStream, typename T> 120 DataStream& operator << ( DataStream& ds, const std::vector<T>& v ) { 121 ds << unsigned_int( v.size() ); 122 for( const auto& i : v ) 123 ds << i; 124 return ds; 125 } 126 127 template<typename DataStream> 128 DataStream& operator << ( DataStream& ds, const std::vector<char>& v ) { 129 ds << unsigned_int( v.size() ); 130 ds.write( v.data(), v.size() ); 131 return ds; 132 } 133 134 template<typename DataStream, typename T> 135 DataStream& operator >> ( DataStream& ds, std::vector<T>& v ) { 136 unsigned_int s; 137 ds >> s; 138 v.resize(s.value); 139 for( auto& i : v ) 140 ds >> i; 141 return ds; 142 } 143 144 template<typename DataStream> 145 DataStream& operator >> ( DataStream& ds, std::vector<char>& v ) { 146 unsigned_int s; 147 ds >> s; 148 v.resize( s.value ); 149 ds.read( v.data(), v.size() ); 150 return ds; 151 } 152 153 template<typename DataStream, typename T> 154 DataStream& operator << ( DataStream& ds, const std::set<T>& s ) { 155 ds << unsigned_int( s.size() ); 156 for( const auto& i : s ) { 157 ds << i; 158 } 159 return ds; 160 } 161 162 template<typename DataStream, typename T> 163 DataStream& operator >> ( DataStream& ds, std::set<T>& s ) { 164 s.clear(); 165 unsigned_int sz; ds >> sz; 166 167 for( uint32_t i = 0; i < sz.value; ++i ) { 168 T v; 169 ds >> v; 170 s.emplace( std::move(v) ); 171 } 172 return ds; 173 } 174 175 template<typename DataStream, typename K, typename V> 176 DataStream& operator << ( DataStream& ds, const std::map<K,V>& m ) { 177 ds << unsigned_int( m.size() ); 178 for( const auto& i : m ) { 179 ds << i.first << i.second; 180 } 181 return ds; 182 } 183 184 template<typename DataStream, typename K, typename V> 185 DataStream& operator >> ( DataStream& ds, std::map<K,V>& m ) { 186 m.clear(); 187 unsigned_int s; ds >> s; 188 189 for (uint32_t i = 0; i < s.value; ++i) { 190 K k; V v; 191 ds >> k >> v; 192 m.emplace( std::move(k), std::move(v) ); 193 } 194 return ds; 195 } 196 197 198 template<typename T> 199 size_t pack_size( const T& value ) { 200 datastream<size_t> ps; 201 ps << value; 202 return ps.tellp(); 203 } 204 205 template<typename T> 206 bytes pack( const T& value ) { 207 bytes result; 208 result.resize(pack_size(value)); 209 210 datastream<char*> ds( result.data(), result.size() ); 211 ds << value; 212 return result; 213 } 214 215 template<typename T> 216 T unpack( const char* buffer, size_t len ) { 217 T result; 218 datastream<const char*> ds(buffer,len); 219 ds >> result; 220 return result; 221 } 222 223 template<typename T> 224 T unpack( const std::vector<char>& bytes ) { 225 return unpack<T>( bytes.data(), bytes.size() ); 226 } 227 228 #endif // DATASTREAM_H
上面的datastream文件我只提取了极少的一部分,实际上EOS几乎已经实现了所有类型与datastream的相互转换。datastream用一个泛化版本和特化版本。特化版本中定义了当前使用类型的一个副本并可以使用指针偏移访问任何位置。而我们的CBase,CDerive的成员变量使用了string,vector,set类型,所以我们自己在这里增加了这三个类型与datastream数据流转换函数,如果我们需要使用map类型的成员,那么我们在这个文件里也要增加相应的转换类型,自定义的也是如此。最后,还有一个依赖的头文件内容如下:
1 struct unsigned_int { 2 unsigned_int( uint32_t v = 0 ):value(v){} 3 4 template<typename T> 5 unsigned_int( T v ):value(v){} 6 7 template<typename T> 8 operator T()const { return static_cast<T>(value); } 9 10 unsigned_int& operator=( uint32_t v ) { value = v; return *this; } 11 12 uint32_t value; 13 14 friend bool operator==( const unsigned_int& i, const uint32_t& v ) { return i.value == v; } 15 friend bool operator==( const uint32_t& i, const unsigned_int& v ) { return i == v.value; } 16 friend bool operator==( const unsigned_int& i, const unsigned_int& v ) { return i.value == v.value; } 17 18 friend bool operator!=( const unsigned_int& i, const uint32_t& v ) { return i.value != v; } 19 friend bool operator!=( const uint32_t& i, const unsigned_int& v ) { return i != v.value; } 20 friend bool operator!=( const unsigned_int& i, const unsigned_int& v ) { return i.value != v.value; } 21 22 friend bool operator<( const unsigned_int& i, const uint32_t& v ) { return i.value < v; } 23 friend bool operator<( const uint32_t& i, const unsigned_int& v ) { return i < v.value; } 24 friend bool operator<( const unsigned_int& i, const unsigned_int& v ) { return i.value < v.value; } 25 26 friend bool operator>=( const unsigned_int& i, const uint32_t& v ) { return i.value >= v; } 27 friend bool operator>=( const uint32_t& i, const unsigned_int& v ) { return i >= v.value; } 28 friend bool operator>=( const unsigned_int& i, const unsigned_int& v ) { return i.value >= v.value; } 29 template<typename DataStream> 30 friend DataStream& operator << ( DataStream& ds, const unsigned_int& v ){ 31 uint64_t val = v.value; 32 do { 33 uint8_t b = uint8_t(val) & 0x7f; 34 val >>= 7; 35 b |= ((val > 0) << 7); 36 ds.write((char*)&b,1);//.put(b); 37 } while( val ); 38 return ds; 39 } 40 41 template<typename DataStream> 42 friend DataStream& operator >> ( DataStream& ds, unsigned_int& vi ){ 43 uint64_t v = 0; char b = 0; uint8_t by = 0; 44 do { 45 ds.get(b); 46 v |= uint32_t(uint8_t(b) & 0x7f) << by; 47 by += 7; 48 } while( uint8_t(b) & 0x80 ); 49 vi.value = static_cast<uint32_t>(v); 50 return ds; 51 } 52 }; 53 54 struct signed_int { 55 signed_int( int32_t v = 0 ):value(v){} 56 operator int32_t()const { return value; } 57 template<typename T> 58 signed_int& operator=( const T& v ) { value = v; return *this; } 59 signed_int operator++(int) { return value++; } 60 signed_int& operator++(){ ++value; return *this; } 61 62 int32_t value; 63 64 friend bool operator==( const signed_int& i, const int32_t& v ) { return i.value == v; } 65 friend bool operator==( const int32_t& i, const signed_int& v ) { return i == v.value; } 66 friend bool operator==( const signed_int& i, const signed_int& v ) { return i.value == v.value; } 67 68 friend bool operator!=( const signed_int& i, const int32_t& v ) { return i.value != v; } 69 friend bool operator!=( const int32_t& i, const signed_int& v ) { return i != v.value; } 70 friend bool operator!=( const signed_int& i, const signed_int& v ) { return i.value != v.value; } 71 72 friend bool operator<( const signed_int& i, const int32_t& v ) { return i.value < v; } 73 friend bool operator<( const int32_t& i, const signed_int& v ) { return i < v.value; } 74 friend bool operator<( const signed_int& i, const signed_int& v ) { return i.value < v.value; } 75 76 friend bool operator>=( const signed_int& i, const int32_t& v ) { return i.value >= v; } 77 friend bool operator>=( const int32_t& i, const signed_int& v ) { return i >= v.value; } 78 friend bool operator>=( const signed_int& i, const signed_int& v ) { return i.value >= v.value; } 79 80 template<typename DataStream> 81 friend DataStream& operator << ( DataStream& ds, const signed_int& v ){ 82 uint32_t val = uint32_t((v.value<<1) ^ (v.value>>31)); 83 do { 84 uint8_t b = uint8_t(val) & 0x7f; 85 val >>= 7; 86 b |= ((val > 0) << 7); 87 ds.write((char*)&b,1);//.put(b); 88 } while( val ); 89 return ds; 90 } 91 template<typename DataStream> 92 friend DataStream& operator >> ( DataStream& ds, signed_int& vi ){ 93 uint32_t v = 0; char b = 0; int by = 0; 94 do { 95 ds.get(b); 96 v |= uint32_t(uint8_t(b) & 0x7f) << by; 97 by += 7; 98 } while( uint8_t(b) & 0x80 ); 99 vi.value = ((v>>1) ^ (v>>31)) + (v&0x01); 100 vi.value = v&0x01 ? vi.value : -vi.value; 101 vi.value = -vi.value; 102 return ds; 103 } 104 };
其中包括一些类型转换以及数据长度的计算,如此简单。