Hadoop基础-Apache Avro串行化的与反串行化

                      Hadoop基础-Apache Avro串行化的与反串行化

                                        作者:尹正杰

版权声明:原创作品,谢绝转载!否则将追究法律责任。

 

 

一.Apache Avro简介

1>.Apache Avro的来源

   Apache Avro 是一个中立性语言,它是有Hadoop之父Doug Cutting开发而来。因为hadoop的Writerable的串行化只支持Java语言,即非跨语言。所以Doug Cutting开发了Avro ,它是一个语言独立的数据结构,也就是说它是跨语言的。

2>.Avro特点

  Apache Avro™是一个数据序列化系统。它有以下特点:

    第一:丰富的数据结构。

    第二:紧凑,快速的二进制数据格式。

    第三:一个容器文件,用于存储持久性数据。

    第四:远程过程调用(RPC)。

    第五:与动态语言的简单集成。读取或写入数据文件不需要代码生成,也不需要使用或实现RPC协议。代码生成是一种可选的优化,只有静态类型语言才值得实现。

  想要了解更多Avro,可以参考Apache官网(http://avro.apache.org/docs/1.8.2/),我就懒得搬运了,直接上干货,本篇博客介绍了两种Avro序列化与反序列化的方式。

3>.安装Avro

  下载Avro:http://mirror.bit.edu.cn/apache/avro/avro-1.8.2/

二.Avro环境准备

  其实部署Avro的流程大致可以用下图来表示,配置起来相对来说还是蛮简单的,具体操作如下: 

1>.创建emp.avsc文件,内容如下 

 1 {
 2     "namespace": "tutorialspoint.com",
 3     "type": "record",
 4     "name": "Emp",
 5     "fields": [
 6     {"name": "name", "type": "string"},
 7     {"name": "id", "type": "int"},
 8     {"name": "salary", "type": "int"},
 9     {"name": "age", "type": "int"},
10     {"name": "address", "type": "string"}
11     ]
12 }

2>.将下载的avro-1.8.2.jar和avro-tools-1.8.2.jar文件放在emp.avsc同级目录

3>.编译schema文件

 

4>.查看文件内容

  1 /**
  2  * Autogenerated by Avro
  3  *
  4  * DO NOT EDIT DIRECTLY
  5  */
  6 package tutorialspoint.com;
  7 
  8 import org.apache.avro.specific.SpecificData;
  9 import org.apache.avro.message.BinaryMessageEncoder;
 10 import org.apache.avro.message.BinaryMessageDecoder;
 11 import org.apache.avro.message.SchemaStore;
 12 
 13 @SuppressWarnings("all")
 14 @org.apache.avro.specific.AvroGenerated
 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
 16   private static final long serialVersionUID = 6405205887550658768L;
 17   public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}");
 18   public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
 19 
 20   private static SpecificData MODEL$ = new SpecificData();
 21 
 22   private static final BinaryMessageEncoder<Emp> ENCODER =
 23       new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$);
 24 
 25   private static final BinaryMessageDecoder<Emp> DECODER =
 26       new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$);
 27 
 28   /**
 29    * Return the BinaryMessageDecoder instance used by this class.
 30    */
 31   public static BinaryMessageDecoder<Emp> getDecoder() {
 32     return DECODER;
 33   }
 34 
 35   /**
 36    * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
 37    * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
 38    */
 39   public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) {
 40     return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver);
 41   }
 42 
 43   /** Serializes this Emp to a ByteBuffer. */
 44   public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
 45     return ENCODER.encode(this);
 46   }
 47 
 48   /** Deserializes a Emp from a ByteBuffer. */
 49   public static Emp fromByteBuffer(
 50       java.nio.ByteBuffer b) throws java.io.IOException {
 51     return DECODER.decode(b);
 52   }
 53 
 54   @Deprecated public java.lang.CharSequence name;
 55   @Deprecated public int id;
 56   @Deprecated public int salary;
 57   @Deprecated public int age;
 58   @Deprecated public java.lang.CharSequence address;
 59 
 60   /**
 61    * Default constructor.  Note that this does not initialize fields
 62    * to their default values from the schema.  If that is desired then
 63    * one should use <code>newBuilder()</code>.
 64    */
 65   public Emp() {}
 66 
 67   /**
 68    * All-args constructor.
 69    * @param name The new value for name
 70    * @param id The new value for id
 71    * @param salary The new value for salary
 72    * @param age The new value for age
 73    * @param address The new value for address
 74    */
 75   public Emp(java.lang.CharSequence name, java.lang.Integer id, java.lang.Integer salary, java.lang.Integer age, java.lang.CharSequence address) {
 76     this.name = name;
 77     this.id = id;
 78     this.salary = salary;
 79     this.age = age;
 80     this.address = address;
 81   }
 82 
 83   public org.apache.avro.Schema getSchema() { return SCHEMA$; }
 84   // Used by DatumWriter.  Applications should not call.
 85   public java.lang.Object get(int field$) {
 86     switch (field$) {
 87     case 0: return name;
 88     case 1: return id;
 89     case 2: return salary;
 90     case 3: return age;
 91     case 4: return address;
 92     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
 93     }
 94   }
 95 
 96   // Used by DatumReader.  Applications should not call.
 97   @SuppressWarnings(value="unchecked")
 98   public void put(int field$, java.lang.Object value$) {
 99     switch (field$) {
100     case 0: name = (java.lang.CharSequence)value$; break;
101     case 1: id = (java.lang.Integer)value$; break;
102     case 2: salary = (java.lang.Integer)value$; break;
103     case 3: age = (java.lang.Integer)value$; break;
104     case 4: address = (java.lang.CharSequence)value$; break;
105     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
106     }
107   }
108 
109   /**
110    * Gets the value of the 'name' field.
111    * @return The value of the 'name' field.
112    */
113   public java.lang.CharSequence getName() {
114     return name;
115   }
116 
117   /**
118    * Sets the value of the 'name' field.
119    * @param value the value to set.
120    */
121   public void setName(java.lang.CharSequence value) {
122     this.name = value;
123   }
124 
125   /**
126    * Gets the value of the 'id' field.
127    * @return The value of the 'id' field.
128    */
129   public java.lang.Integer getId() {
130     return id;
131   }
132 
133   /**
134    * Sets the value of the 'id' field.
135    * @param value the value to set.
136    */
137   public void setId(java.lang.Integer value) {
138     this.id = value;
139   }
140 
141   /**
142    * Gets the value of the 'salary' field.
143    * @return The value of the 'salary' field.
144    */
145   public java.lang.Integer getSalary() {
146     return salary;
147   }
148 
149   /**
150    * Sets the value of the 'salary' field.
151    * @param value the value to set.
152    */
153   public void setSalary(java.lang.Integer value) {
154     this.salary = value;
155   }
156 
157   /**
158    * Gets the value of the 'age' field.
159    * @return The value of the 'age' field.
160    */
161   public java.lang.Integer getAge() {
162     return age;
163   }
164 
165   /**
166    * Sets the value of the 'age' field.
167    * @param value the value to set.
168    */
169   public void setAge(java.lang.Integer value) {
170     this.age = value;
171   }
172 
173   /**
174    * Gets the value of the 'address' field.
175    * @return The value of the 'address' field.
176    */
177   public java.lang.CharSequence getAddress() {
178     return address;
179   }
180 
181   /**
182    * Sets the value of the 'address' field.
183    * @param value the value to set.
184    */
185   public void setAddress(java.lang.CharSequence value) {
186     this.address = value;
187   }
188 
189   /**
190    * Creates a new Emp RecordBuilder.
191    * @return A new Emp RecordBuilder
192    */
193   public static tutorialspoint.com.Emp.Builder newBuilder() {
194     return new tutorialspoint.com.Emp.Builder();
195   }
196 
197   /**
198    * Creates a new Emp RecordBuilder by copying an existing Builder.
199    * @param other The existing builder to copy.
200    * @return A new Emp RecordBuilder
201    */
202   public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp.Builder other) {
203     return new tutorialspoint.com.Emp.Builder(other);
204   }
205 
206   /**
207    * Creates a new Emp RecordBuilder by copying an existing Emp instance.
208    * @param other The existing instance to copy.
209    * @return A new Emp RecordBuilder
210    */
211   public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp other) {
212     return new tutorialspoint.com.Emp.Builder(other);
213   }
214 
215   /**
216    * RecordBuilder for Emp instances.
217    */
218   public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp>
219     implements org.apache.avro.data.RecordBuilder<Emp> {
220 
221     private java.lang.CharSequence name;
222     private int id;
223     private int salary;
224     private int age;
225     private java.lang.CharSequence address;
226 
227     /** Creates a new Builder */
228     private Builder() {
229       super(SCHEMA$);
230     }
231 
232     /**
233      * Creates a Builder by copying an existing Builder.
234      * @param other The existing Builder to copy.
235      */
236     private Builder(tutorialspoint.com.Emp.Builder other) {
237       super(other);
238       if (isValidValue(fields()[0], other.name)) {
239         this.name = data().deepCopy(fields()[0].schema(), other.name);
240         fieldSetFlags()[0] = true;
241       }
242       if (isValidValue(fields()[1], other.id)) {
243         this.id = data().deepCopy(fields()[1].schema(), other.id);
244         fieldSetFlags()[1] = true;
245       }
246       if (isValidValue(fields()[2], other.salary)) {
247         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
248         fieldSetFlags()[2] = true;
249       }
250       if (isValidValue(fields()[3], other.age)) {
251         this.age = data().deepCopy(fields()[3].schema(), other.age);
252         fieldSetFlags()[3] = true;
253       }
254       if (isValidValue(fields()[4], other.address)) {
255         this.address = data().deepCopy(fields()[4].schema(), other.address);
256         fieldSetFlags()[4] = true;
257       }
258     }
259 
260     /**
261      * Creates a Builder by copying an existing Emp instance
262      * @param other The existing instance to copy.
263      */
264     private Builder(tutorialspoint.com.Emp other) {
265             super(SCHEMA$);
266       if (isValidValue(fields()[0], other.name)) {
267         this.name = data().deepCopy(fields()[0].schema(), other.name);
268         fieldSetFlags()[0] = true;
269       }
270       if (isValidValue(fields()[1], other.id)) {
271         this.id = data().deepCopy(fields()[1].schema(), other.id);
272         fieldSetFlags()[1] = true;
273       }
274       if (isValidValue(fields()[2], other.salary)) {
275         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
276         fieldSetFlags()[2] = true;
277       }
278       if (isValidValue(fields()[3], other.age)) {
279         this.age = data().deepCopy(fields()[3].schema(), other.age);
280         fieldSetFlags()[3] = true;
281       }
282       if (isValidValue(fields()[4], other.address)) {
283         this.address = data().deepCopy(fields()[4].schema(), other.address);
284         fieldSetFlags()[4] = true;
285       }
286     }
287 
288     /**
289       * Gets the value of the 'name' field.
290       * @return The value.
291       */
292     public java.lang.CharSequence getName() {
293       return name;
294     }
295 
296     /**
297       * Sets the value of the 'name' field.
298       * @param value The value of 'name'.
299       * @return This builder.
300       */
301     public tutorialspoint.com.Emp.Builder setName(java.lang.CharSequence value) {
302       validate(fields()[0], value);
303       this.name = value;
304       fieldSetFlags()[0] = true;
305       return this;
306     }
307 
308     /**
309       * Checks whether the 'name' field has been set.
310       * @return True if the 'name' field has been set, false otherwise.
311       */
312     public boolean hasName() {
313       return fieldSetFlags()[0];
314     }
315 
316 
317     /**
318       * Clears the value of the 'name' field.
319       * @return This builder.
320       */
321     public tutorialspoint.com.Emp.Builder clearName() {
322       name = null;
323       fieldSetFlags()[0] = false;
324       return this;
325     }
326 
327     /**
328       * Gets the value of the 'id' field.
329       * @return The value.
330       */
331     public java.lang.Integer getId() {
332       return id;
333     }
334 
335     /**
336       * Sets the value of the 'id' field.
337       * @param value The value of 'id'.
338       * @return This builder.
339       */
340     public tutorialspoint.com.Emp.Builder setId(int value) {
341       validate(fields()[1], value);
342       this.id = value;
343       fieldSetFlags()[1] = true;
344       return this;
345     }
346 
347     /**
348       * Checks whether the 'id' field has been set.
349       * @return True if the 'id' field has been set, false otherwise.
350       */
351     public boolean hasId() {
352       return fieldSetFlags()[1];
353     }
354 
355 
356     /**
357       * Clears the value of the 'id' field.
358       * @return This builder.
359       */
360     public tutorialspoint.com.Emp.Builder clearId() {
361       fieldSetFlags()[1] = false;
362       return this;
363     }
364 
365     /**
366       * Gets the value of the 'salary' field.
367       * @return The value.
368       */
369     public java.lang.Integer getSalary() {
370       return salary;
371     }
372 
373     /**
374       * Sets the value of the 'salary' field.
375       * @param value The value of 'salary'.
376       * @return This builder.
377       */
378     public tutorialspoint.com.Emp.Builder setSalary(int value) {
379       validate(fields()[2], value);
380       this.salary = value;
381       fieldSetFlags()[2] = true;
382       return this;
383     }
384 
385     /**
386       * Checks whether the 'salary' field has been set.
387       * @return True if the 'salary' field has been set, false otherwise.
388       */
389     public boolean hasSalary() {
390       return fieldSetFlags()[2];
391     }
392 
393 
394     /**
395       * Clears the value of the 'salary' field.
396       * @return This builder.
397       */
398     public tutorialspoint.com.Emp.Builder clearSalary() {
399       fieldSetFlags()[2] = false;
400       return this;
401     }
402 
403     /**
404       * Gets the value of the 'age' field.
405       * @return The value.
406       */
407     public java.lang.Integer getAge() {
408       return age;
409     }
410 
411     /**
412       * Sets the value of the 'age' field.
413       * @param value The value of 'age'.
414       * @return This builder.
415       */
416     public tutorialspoint.com.Emp.Builder setAge(int value) {
417       validate(fields()[3], value);
418       this.age = value;
419       fieldSetFlags()[3] = true;
420       return this;
421     }
422 
423     /**
424       * Checks whether the 'age' field has been set.
425       * @return True if the 'age' field has been set, false otherwise.
426       */
427     public boolean hasAge() {
428       return fieldSetFlags()[3];
429     }
430 
431 
432     /**
433       * Clears the value of the 'age' field.
434       * @return This builder.
435       */
436     public tutorialspoint.com.Emp.Builder clearAge() {
437       fieldSetFlags()[3] = false;
438       return this;
439     }
440 
441     /**
442       * Gets the value of the 'address' field.
443       * @return The value.
444       */
445     public java.lang.CharSequence getAddress() {
446       return address;
447     }
448 
449     /**
450       * Sets the value of the 'address' field.
451       * @param value The value of 'address'.
452       * @return This builder.
453       */
454     public tutorialspoint.com.Emp.Builder setAddress(java.lang.CharSequence value) {
455       validate(fields()[4], value);
456       this.address = value;
457       fieldSetFlags()[4] = true;
458       return this;
459     }
460 
461     /**
462       * Checks whether the 'address' field has been set.
463       * @return True if the 'address' field has been set, false otherwise.
464       */
465     public boolean hasAddress() {
466       return fieldSetFlags()[4];
467     }
468 
469 
470     /**
471       * Clears the value of the 'address' field.
472       * @return This builder.
473       */
474     public tutorialspoint.com.Emp.Builder clearAddress() {
475       address = null;
476       fieldSetFlags()[4] = false;
477       return this;
478     }
479 
480     @Override
481     @SuppressWarnings("unchecked")
482     public Emp build() {
483       try {
484         Emp record = new Emp();
485         record.name = fieldSetFlags()[0] ? this.name : (java.lang.CharSequence) defaultValue(fields()[0]);
486         record.id = fieldSetFlags()[1] ? this.id : (java.lang.Integer) defaultValue(fields()[1]);
487         record.salary = fieldSetFlags()[2] ? this.salary : (java.lang.Integer) defaultValue(fields()[2]);
488         record.age = fieldSetFlags()[3] ? this.age : (java.lang.Integer) defaultValue(fields()[3]);
489         record.address = fieldSetFlags()[4] ? this.address : (java.lang.CharSequence) defaultValue(fields()[4]);
490         return record;
491       } catch (java.lang.Exception e) {
492         throw new org.apache.avro.AvroRuntimeException(e);
493       }
494     }
495   }
496 
497   @SuppressWarnings("unchecked")
498   private static final org.apache.avro.io.DatumWriter<Emp>
499     WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$);
500 
501   @Override public void writeExternal(java.io.ObjectOutput out)
502     throws java.io.IOException {
503     WRITER$.write(this, SpecificData.getEncoder(out));
504   }
505 
506   @SuppressWarnings("unchecked")
507   private static final org.apache.avro.io.DatumReader<Emp>
508     READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$);
509 
510   @Override public void readExternal(java.io.ObjectInput in)
511     throws java.io.IOException {
512     READER$.read(this, SpecificData.getDecoder(in));
513   }
514 
515 }
Emp.java 文件内容

5>.将此文件加载到idea

   第一步:编辑pom.xml配置文件

 1 <?xml version="1.0" encoding="UTF-8"?>
 2 <project xmlns="http://maven.apache.org/POM/4.0.0"
 3          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5     <modelVersion>4.0.0</modelVersion>
 6 
 7     <groupId>com.yinzhengjie</groupId>
 8     <artifactId>myAvro-pb</artifactId>
 9     <version>1.0-SNAPSHOT</version>
10 
11     <dependencies>
12         <dependency>
13             <groupId>org.apache.avro</groupId>
14             <artifactId>avro</artifactId>
15             <version>1.8.2</version>
16         </dependency>
17         <dependency>
18             <groupId>org.apache.avro</groupId>
19             <artifactId>avro-tools</artifactId>
20             <version>1.8.2</version>
21         </dependency>
22         <dependency>
23             <groupId>junit</groupId>
24             <artifactId>junit</artifactId>
25             <version>4.12</version>
26         </dependency>
27     </dependencies>
28 </project>
pom.xml 文件内容

  第二步:新建包,名称tutorialspoint.com,并将Emp.java文件复制到包内,如有错误不建议直接修改。

  1 /**
  2  * Autogenerated by Avro
  3  *
  4  * DO NOT EDIT DIRECTLY
  5  */
  6 package tutorialspoint.com;
  7 
  8 import org.apache.avro.specific.SpecificData;
  9 import org.apache.avro.message.BinaryMessageEncoder;
 10 import org.apache.avro.message.BinaryMessageDecoder;
 11 import org.apache.avro.message.SchemaStore;
 12 
 13 @SuppressWarnings("all")
 14 @org.apache.avro.specific.AvroGenerated
 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
 16   private static final long serialVersionUID = 6405205887550658768L;
 17   public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}");
 18   public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
 19 
 20   private static SpecificData MODEL$ = new SpecificData();
 21 
 22   private static final BinaryMessageEncoder<Emp> ENCODER =
 23       new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$);
 24 
 25   private static final BinaryMessageDecoder<Emp> DECODER =
 26       new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$);
 27 
 28   /**
 29    * Return the BinaryMessageDecoder instance used by this class.
 30    */
 31   public static BinaryMessageDecoder<Emp> getDecoder() {
 32     return DECODER;
 33   }
 34 
 35   /**
 36    * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
 37    * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
 38    */
 39   public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) {
 40     return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver);
 41   }
 42 
 43   /** Serializes this Emp to a ByteBuffer. */
 44   public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
 45     return ENCODER.encode(this);
 46   }
 47 
 48   /** Deserializes a Emp from a ByteBuffer. */
 49   public static Emp fromByteBuffer(
 50       java.nio.ByteBuffer b) throws java.io.IOException {
 51     return DECODER.decode(b);
 52   }
 53 
 54   @Deprecated public CharSequence name;
 55   @Deprecated public int id;
 56   @Deprecated public int salary;
 57   @Deprecated public int age;
 58   @Deprecated public CharSequence address;
 59 
 60   /**
 61    * Default constructor.  Note that this does not initialize fields
 62    * to their default values from the schema.  If that is desired then
 63    * one should use <code>newBuilder()</code>.
 64    */
 65   public Emp() {}
 66 
 67   /**
 68    * All-args constructor.
 69    * @param name The new value for name
 70    * @param id The new value for id
 71    * @param salary The new value for salary
 72    * @param age The new value for age
 73    * @param address The new value for address
 74    */
 75   public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) {
 76     this.name = name;
 77     this.id = id;
 78     this.salary = salary;
 79     this.age = age;
 80     this.address = address;
 81   }
 82 
 83   public org.apache.avro.Schema getSchema() { return SCHEMA$; }
 84   // Used by DatumWriter.  Applications should not call.
 85   public Object get(int field$) {
 86     switch (field$) {
 87     case 0: return name;
 88     case 1: return id;
 89     case 2: return salary;
 90     case 3: return age;
 91     case 4: return address;
 92     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
 93     }
 94   }
 95 
 96   // Used by DatumReader.  Applications should not call.
 97   @SuppressWarnings(value="unchecked")
 98   public void put(int field$, Object value$) {
 99     switch (field$) {
100     case 0: name = (CharSequence)value$; break;
101     case 1: id = (Integer)value$; break;
102     case 2: salary = (Integer)value$; break;
103     case 3: age = (Integer)value$; break;
104     case 4: address = (CharSequence)value$; break;
105     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
106     }
107   }
108 
109   /**
110    * Gets the value of the 'name' field.
111    * @return The value of the 'name' field.
112    */
113   public CharSequence getName() {
114     return name;
115   }
116 
117   /**
118    * Sets the value of the 'name' field.
119    * @param value the value to set.
120    */
121   public void setName(CharSequence value) {
122     this.name = value;
123   }
124 
125   /**
126    * Gets the value of the 'id' field.
127    * @return The value of the 'id' field.
128    */
129   public Integer getId() {
130     return id;
131   }
132 
133   /**
134    * Sets the value of the 'id' field.
135    * @param value the value to set.
136    */
137   public void setId(Integer value) {
138     this.id = value;
139   }
140 
141   /**
142    * Gets the value of the 'salary' field.
143    * @return The value of the 'salary' field.
144    */
145   public Integer getSalary() {
146     return salary;
147   }
148 
149   /**
150    * Sets the value of the 'salary' field.
151    * @param value the value to set.
152    */
153   public void setSalary(Integer value) {
154     this.salary = value;
155   }
156 
157   /**
158    * Gets the value of the 'age' field.
159    * @return The value of the 'age' field.
160    */
161   public Integer getAge() {
162     return age;
163   }
164 
165   /**
166    * Sets the value of the 'age' field.
167    * @param value the value to set.
168    */
169   public void setAge(Integer value) {
170     this.age = value;
171   }
172 
173   /**
174    * Gets the value of the 'address' field.
175    * @return The value of the 'address' field.
176    */
177   public CharSequence getAddress() {
178     return address;
179   }
180 
181   /**
182    * Sets the value of the 'address' field.
183    * @param value the value to set.
184    */
185   public void setAddress(CharSequence value) {
186     this.address = value;
187   }
188 
189   /**
190    * Creates a new Emp RecordBuilder.
191    * @return A new Emp RecordBuilder
192    */
193   public static Builder newBuilder() {
194     return new Builder();
195   }
196 
197   /**
198    * Creates a new Emp RecordBuilder by copying an existing Builder.
199    * @param other The existing builder to copy.
200    * @return A new Emp RecordBuilder
201    */
202   public static Builder newBuilder(Builder other) {
203     return new Builder(other);
204   }
205 
206   /**
207    * Creates a new Emp RecordBuilder by copying an existing Emp instance.
208    * @param other The existing instance to copy.
209    * @return A new Emp RecordBuilder
210    */
211   public static Builder newBuilder(Emp other) {
212     return new Builder(other);
213   }
214 
215   /**
216    * RecordBuilder for Emp instances.
217    */
218   public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp>
219     implements org.apache.avro.data.RecordBuilder<Emp> {
220 
221     private CharSequence name;
222     private int id;
223     private int salary;
224     private int age;
225     private CharSequence address;
226 
227     /** Creates a new Builder */
228     private Builder() {
229       super(SCHEMA$);
230     }
231 
232     /**
233      * Creates a Builder by copying an existing Builder.
234      * @param other The existing Builder to copy.
235      */
236     private Builder(Builder other) {
237       super(other);
238       if (isValidValue(fields()[0], other.name)) {
239         this.name = data().deepCopy(fields()[0].schema(), other.name);
240         fieldSetFlags()[0] = true;
241       }
242       if (isValidValue(fields()[1], other.id)) {
243         this.id = data().deepCopy(fields()[1].schema(), other.id);
244         fieldSetFlags()[1] = true;
245       }
246       if (isValidValue(fields()[2], other.salary)) {
247         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
248         fieldSetFlags()[2] = true;
249       }
250       if (isValidValue(fields()[3], other.age)) {
251         this.age = data().deepCopy(fields()[3].schema(), other.age);
252         fieldSetFlags()[3] = true;
253       }
254       if (isValidValue(fields()[4], other.address)) {
255         this.address = data().deepCopy(fields()[4].schema(), other.address);
256         fieldSetFlags()[4] = true;
257       }
258     }
259 
260     /**
261      * Creates a Builder by copying an existing Emp instance
262      * @param other The existing instance to copy.
263      */
264     private Builder(Emp other) {
265             super(SCHEMA$);
266       if (isValidValue(fields()[0], other.name)) {
267         this.name = data().deepCopy(fields()[0].schema(), other.name);
268         fieldSetFlags()[0] = true;
269       }
270       if (isValidValue(fields()[1], other.id)) {
271         this.id = data().deepCopy(fields()[1].schema(), other.id);
272         fieldSetFlags()[1] = true;
273       }
274       if (isValidValue(fields()[2], other.salary)) {
275         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
276         fieldSetFlags()[2] = true;
277       }
278       if (isValidValue(fields()[3], other.age)) {
279         this.age = data().deepCopy(fields()[3].schema(), other.age);
280         fieldSetFlags()[3] = true;
281       }
282       if (isValidValue(fields()[4], other.address)) {
283         this.address = data().deepCopy(fields()[4].schema(), other.address);
284         fieldSetFlags()[4] = true;
285       }
286     }
287 
288     /**
289       * Gets the value of the 'name' field.
290       * @return The value.
291       */
292     public CharSequence getName() {
293       return name;
294     }
295 
296     /**
297       * Sets the value of the 'name' field.
298       * @param value The value of 'name'.
299       * @return This builder.
300       */
301     public Builder setName(CharSequence value) {
302       validate(fields()[0], value);
303       this.name = value;
304       fieldSetFlags()[0] = true;
305       return this;
306     }
307 
308     /**
309       * Checks whether the 'name' field has been set.
310       * @return True if the 'name' field has been set, false otherwise.
311       */
312     public boolean hasName() {
313       return fieldSetFlags()[0];
314     }
315 
316 
317     /**
318       * Clears the value of the 'name' field.
319       * @return This builder.
320       */
321     public Builder clearName() {
322       name = null;
323       fieldSetFlags()[0] = false;
324       return this;
325     }
326 
327     /**
328       * Gets the value of the 'id' field.
329       * @return The value.
330       */
331     public Integer getId() {
332       return id;
333     }
334 
335     /**
336       * Sets the value of the 'id' field.
337       * @param value The value of 'id'.
338       * @return This builder.
339       */
340     public Builder setId(int value) {
341       validate(fields()[1], value);
342       this.id = value;
343       fieldSetFlags()[1] = true;
344       return this;
345     }
346 
347     /**
348       * Checks whether the 'id' field has been set.
349       * @return True if the 'id' field has been set, false otherwise.
350       */
351     public boolean hasId() {
352       return fieldSetFlags()[1];
353     }
354 
355 
356     /**
357       * Clears the value of the 'id' field.
358       * @return This builder.
359       */
360     public Builder clearId() {
361       fieldSetFlags()[1] = false;
362       return this;
363     }
364 
365     /**
366       * Gets the value of the 'salary' field.
367       * @return The value.
368       */
369     public Integer getSalary() {
370       return salary;
371     }
372 
373     /**
374       * Sets the value of the 'salary' field.
375       * @param value The value of 'salary'.
376       * @return This builder.
377       */
378     public Builder setSalary(int value) {
379       validate(fields()[2], value);
380       this.salary = value;
381       fieldSetFlags()[2] = true;
382       return this;
383     }
384 
385     /**
386       * Checks whether the 'salary' field has been set.
387       * @return True if the 'salary' field has been set, false otherwise.
388       */
389     public boolean hasSalary() {
390       return fieldSetFlags()[2];
391     }
392 
393 
394     /**
395       * Clears the value of the 'salary' field.
396       * @return This builder.
397       */
398     public Builder clearSalary() {
399       fieldSetFlags()[2] = false;
400       return this;
401     }
402 
403     /**
404       * Gets the value of the 'age' field.
405       * @return The value.
406       */
407     public Integer getAge() {
408       return age;
409     }
410 
411     /**
412       * Sets the value of the 'age' field.
413       * @param value The value of 'age'.
414       * @return This builder.
415       */
416     public Builder setAge(int value) {
417       validate(fields()[3], value);
418       this.age = value;
419       fieldSetFlags()[3] = true;
420       return this;
421     }
422 
423     /**
424       * Checks whether the 'age' field has been set.
425       * @return True if the 'age' field has been set, false otherwise.
426       */
427     public boolean hasAge() {
428       return fieldSetFlags()[3];
429     }
430 
431 
432     /**
433       * Clears the value of the 'age' field.
434       * @return This builder.
435       */
436     public Builder clearAge() {
437       fieldSetFlags()[3] = false;
438       return this;
439     }
440 
441     /**
442       * Gets the value of the 'address' field.
443       * @return The value.
444       */
445     public CharSequence getAddress() {
446       return address;
447     }
448 
449     /**
450       * Sets the value of the 'address' field.
451       * @param value The value of 'address'.
452       * @return This builder.
453       */
454     public Builder setAddress(CharSequence value) {
455       validate(fields()[4], value);
456       this.address = value;
457       fieldSetFlags()[4] = true;
458       return this;
459     }
460 
461     /**
462       * Checks whether the 'address' field has been set.
463       * @return True if the 'address' field has been set, false otherwise.
464       */
465     public boolean hasAddress() {
466       return fieldSetFlags()[4];
467     }
468 
469 
470     /**
471       * Clears the value of the 'address' field.
472       * @return This builder.
473       */
474     public Builder clearAddress() {
475       address = null;
476       fieldSetFlags()[4] = false;
477       return this;
478     }
479 
480 
481     @SuppressWarnings("unchecked")
482     public Emp build() {
483       try {
484         Emp record = new Emp();
485         record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]);
486         record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]);
487         record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]);
488         record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]);
489         record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]);
490         return record;
491       } catch (Exception e) {
492         throw new org.apache.avro.AvroRuntimeException(e);
493       }
494     }
495   }
496 
497   @SuppressWarnings("unchecked")
498   private static final org.apache.avro.io.DatumWriter<Emp>
499     WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$);
500 
501   @Override public void writeExternal(java.io.ObjectOutput out)
502     throws java.io.IOException {
503     WRITER$.write(this, SpecificData.getEncoder(out));
504   }
505 
506   @SuppressWarnings("unchecked")
507   private static final org.apache.avro.io.DatumReader<Emp>
508     READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$);
509 
510   @Override public void readExternal(java.io.ObjectInput in)
511     throws java.io.IOException {
512     READER$.read(this, SpecificData.getDecoder(in));
513   }
514 
515 }
Emp.java(删除第480行的“@Override”)

 

三.进行Avro编程

1>.开始编写串行化代码

/*
@author :yinzhengjie
Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
EMAIL:y1053419035@qq.com
*/
package cn.org.yinzhengjie.avro;

import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.junit.Test;
import tutorialspoint.com.Emp;

import java.io.File;
import java.io.IOException;

public class TestAvro {

    /**
     * 测试Avro序列化
     */
    @Test
    public void testAvroSerial() throws IOException {
        //定义对象,并给对象赋初值
        Emp yinzhengjie = new Emp();
        yinzhengjie.setId(1);
        yinzhengjie.setName("尹正杰");
        yinzhengjie.setAge(18);
        yinzhengjie.setSalary(80000);
        yinzhengjie.setAddress("北京");

        //初始化writer对象,我习惯称它为写入器
        DatumWriter<Emp> dw = new SpecificDatumWriter<Emp>(Emp.class);
        //初始化文件写入器
        DataFileWriter<Emp> dfw = new DataFileWriter<Emp>(dw);
        //开始序列化文件,这个 Emp.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。
        dfw.create(Emp.SCHEMA$,new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avro"));
        //在序列化文件中追加对象
        dfw.append(yinzhengjie);
        //释放资源
        dfw.close();
        System.out.println("序列化成功!");
    }
}

2>.测试java、hadoop、avro对10000000个对象串行化速度比对,大小比对

  1 /**
  2  * Autogenerated by Avro
  3  *
  4  * DO NOT EDIT DIRECTLY
  5  */
  6 package tutorialspoint.com;
  7 
  8 import org.apache.avro.specific.SpecificData;
  9 import org.apache.avro.message.BinaryMessageEncoder;
 10 import org.apache.avro.message.BinaryMessageDecoder;
 11 import org.apache.avro.message.SchemaStore;
 12 
 13 @SuppressWarnings("all")
 14 @org.apache.avro.specific.AvroGenerated
 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
 16   private static final long serialVersionUID = 6405205887550658768L;
 17   public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}");
 18   public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
 19 
 20   private static SpecificData MODEL$ = new SpecificData();
 21 
 22   private static final BinaryMessageEncoder<Emp> ENCODER =
 23       new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$);
 24 
 25   private static final BinaryMessageDecoder<Emp> DECODER =
 26       new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$);
 27 
 28   /**
 29    * Return the BinaryMessageDecoder instance used by this class.
 30    */
 31   public static BinaryMessageDecoder<Emp> getDecoder() {
 32     return DECODER;
 33   }
 34 
 35   /**
 36    * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
 37    * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
 38    */
 39   public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) {
 40     return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver);
 41   }
 42 
 43   /** Serializes this Emp to a ByteBuffer. */
 44   public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
 45     return ENCODER.encode(this);
 46   }
 47 
 48   /** Deserializes a Emp from a ByteBuffer. */
 49   public static Emp fromByteBuffer(
 50       java.nio.ByteBuffer b) throws java.io.IOException {
 51     return DECODER.decode(b);
 52   }
 53 
 54   @Deprecated public CharSequence name;
 55   @Deprecated public int id;
 56   @Deprecated public int salary;
 57   @Deprecated public int age;
 58   @Deprecated public CharSequence address;
 59 
 60   /**
 61    * Default constructor.  Note that this does not initialize fields
 62    * to their default values from the schema.  If that is desired then
 63    * one should use <code>newBuilder()</code>.
 64    */
 65   public Emp() {}
 66 
 67   /**
 68    * All-args constructor.
 69    * @param name The new value for name
 70    * @param id The new value for id
 71    * @param salary The new value for salary
 72    * @param age The new value for age
 73    * @param address The new value for address
 74    */
 75   public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) {
 76     this.name = name;
 77     this.id = id;
 78     this.salary = salary;
 79     this.age = age;
 80     this.address = address;
 81   }
 82 
 83   public org.apache.avro.Schema getSchema() { return SCHEMA$; }
 84   // Used by DatumWriter.  Applications should not call.
 85   public Object get(int field$) {
 86     switch (field$) {
 87     case 0: return name;
 88     case 1: return id;
 89     case 2: return salary;
 90     case 3: return age;
 91     case 4: return address;
 92     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
 93     }
 94   }
 95 
 96   // Used by DatumReader.  Applications should not call.
 97   @SuppressWarnings(value="unchecked")
 98   public void put(int field$, Object value$) {
 99     switch (field$) {
100     case 0: name = (CharSequence)value$; break;
101     case 1: id = (Integer)value$; break;
102     case 2: salary = (Integer)value$; break;
103     case 3: age = (Integer)value$; break;
104     case 4: address = (CharSequence)value$; break;
105     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
106     }
107   }
108 
109   /**
110    * Gets the value of the 'name' field.
111    * @return The value of the 'name' field.
112    */
113   public CharSequence getName() {
114     return name;
115   }
116 
117   /**
118    * Sets the value of the 'name' field.
119    * @param value the value to set.
120    */
121   public void setName(CharSequence value) {
122     this.name = value;
123   }
124 
125   /**
126    * Gets the value of the 'id' field.
127    * @return The value of the 'id' field.
128    */
129   public Integer getId() {
130     return id;
131   }
132 
133   /**
134    * Sets the value of the 'id' field.
135    * @param value the value to set.
136    */
137   public void setId(Integer value) {
138     this.id = value;
139   }
140 
141   /**
142    * Gets the value of the 'salary' field.
143    * @return The value of the 'salary' field.
144    */
145   public Integer getSalary() {
146     return salary;
147   }
148 
149   /**
150    * Sets the value of the 'salary' field.
151    * @param value the value to set.
152    */
153   public void setSalary(Integer value) {
154     this.salary = value;
155   }
156 
157   /**
158    * Gets the value of the 'age' field.
159    * @return The value of the 'age' field.
160    */
161   public Integer getAge() {
162     return age;
163   }
164 
165   /**
166    * Sets the value of the 'age' field.
167    * @param value the value to set.
168    */
169   public void setAge(Integer value) {
170     this.age = value;
171   }
172 
173   /**
174    * Gets the value of the 'address' field.
175    * @return The value of the 'address' field.
176    */
177   public CharSequence getAddress() {
178     return address;
179   }
180 
181   /**
182    * Sets the value of the 'address' field.
183    * @param value the value to set.
184    */
185   public void setAddress(CharSequence value) {
186     this.address = value;
187   }
188 
189   /**
190    * Creates a new Emp RecordBuilder.
191    * @return A new Emp RecordBuilder
192    */
193   public static Builder newBuilder() {
194     return new Builder();
195   }
196 
197   /**
198    * Creates a new Emp RecordBuilder by copying an existing Builder.
199    * @param other The existing builder to copy.
200    * @return A new Emp RecordBuilder
201    */
202   public static Builder newBuilder(Builder other) {
203     return new Builder(other);
204   }
205 
206   /**
207    * Creates a new Emp RecordBuilder by copying an existing Emp instance.
208    * @param other The existing instance to copy.
209    * @return A new Emp RecordBuilder
210    */
211   public static Builder newBuilder(Emp other) {
212     return new Builder(other);
213   }
214 
215   /**
216    * RecordBuilder for Emp instances.
217    */
218   public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp>
219     implements org.apache.avro.data.RecordBuilder<Emp> {
220 
221     private CharSequence name;
222     private int id;
223     private int salary;
224     private int age;
225     private CharSequence address;
226 
227     /** Creates a new Builder */
228     private Builder() {
229       super(SCHEMA$);
230     }
231 
232     /**
233      * Creates a Builder by copying an existing Builder.
234      * @param other The existing Builder to copy.
235      */
236     private Builder(Builder other) {
237       super(other);
238       if (isValidValue(fields()[0], other.name)) {
239         this.name = data().deepCopy(fields()[0].schema(), other.name);
240         fieldSetFlags()[0] = true;
241       }
242       if (isValidValue(fields()[1], other.id)) {
243         this.id = data().deepCopy(fields()[1].schema(), other.id);
244         fieldSetFlags()[1] = true;
245       }
246       if (isValidValue(fields()[2], other.salary)) {
247         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
248         fieldSetFlags()[2] = true;
249       }
250       if (isValidValue(fields()[3], other.age)) {
251         this.age = data().deepCopy(fields()[3].schema(), other.age);
252         fieldSetFlags()[3] = true;
253       }
254       if (isValidValue(fields()[4], other.address)) {
255         this.address = data().deepCopy(fields()[4].schema(), other.address);
256         fieldSetFlags()[4] = true;
257       }
258     }
259 
260     /**
261      * Creates a Builder by copying an existing Emp instance
262      * @param other The existing instance to copy.
263      */
264     private Builder(Emp other) {
265             super(SCHEMA$);
266       if (isValidValue(fields()[0], other.name)) {
267         this.name = data().deepCopy(fields()[0].schema(), other.name);
268         fieldSetFlags()[0] = true;
269       }
270       if (isValidValue(fields()[1], other.id)) {
271         this.id = data().deepCopy(fields()[1].schema(), other.id);
272         fieldSetFlags()[1] = true;
273       }
274       if (isValidValue(fields()[2], other.salary)) {
275         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
276         fieldSetFlags()[2] = true;
277       }
278       if (isValidValue(fields()[3], other.age)) {
279         this.age = data().deepCopy(fields()[3].schema(), other.age);
280         fieldSetFlags()[3] = true;
281       }
282       if (isValidValue(fields()[4], other.address)) {
283         this.address = data().deepCopy(fields()[4].schema(), other.address);
284         fieldSetFlags()[4] = true;
285       }
286     }
287 
288     /**
289       * Gets the value of the 'name' field.
290       * @return The value.
291       */
292     public CharSequence getName() {
293       return name;
294     }
295 
296     /**
297       * Sets the value of the 'name' field.
298       * @param value The value of 'name'.
299       * @return This builder.
300       */
301     public Builder setName(CharSequence value) {
302       validate(fields()[0], value);
303       this.name = value;
304       fieldSetFlags()[0] = true;
305       return this;
306     }
307 
308     /**
309       * Checks whether the 'name' field has been set.
310       * @return True if the 'name' field has been set, false otherwise.
311       */
312     public boolean hasName() {
313       return fieldSetFlags()[0];
314     }
315 
316 
317     /**
318       * Clears the value of the 'name' field.
319       * @return This builder.
320       */
321     public Builder clearName() {
322       name = null;
323       fieldSetFlags()[0] = false;
324       return this;
325     }
326 
327     /**
328       * Gets the value of the 'id' field.
329       * @return The value.
330       */
331     public Integer getId() {
332       return id;
333     }
334 
335     /**
336       * Sets the value of the 'id' field.
337       * @param value The value of 'id'.
338       * @return This builder.
339       */
340     public Builder setId(int value) {
341       validate(fields()[1], value);
342       this.id = value;
343       fieldSetFlags()[1] = true;
344       return this;
345     }
346 
347     /**
348       * Checks whether the 'id' field has been set.
349       * @return True if the 'id' field has been set, false otherwise.
350       */
351     public boolean hasId() {
352       return fieldSetFlags()[1];
353     }
354 
355 
356     /**
357       * Clears the value of the 'id' field.
358       * @return This builder.
359       */
360     public Builder clearId() {
361       fieldSetFlags()[1] = false;
362       return this;
363     }
364 
365     /**
366       * Gets the value of the 'salary' field.
367       * @return The value.
368       */
369     public Integer getSalary() {
370       return salary;
371     }
372 
373     /**
374       * Sets the value of the 'salary' field.
375       * @param value The value of 'salary'.
376       * @return This builder.
377       */
378     public Builder setSalary(int value) {
379       validate(fields()[2], value);
380       this.salary = value;
381       fieldSetFlags()[2] = true;
382       return this;
383     }
384 
385     /**
386       * Checks whether the 'salary' field has been set.
387       * @return True if the 'salary' field has been set, false otherwise.
388       */
389     public boolean hasSalary() {
390       return fieldSetFlags()[2];
391     }
392 
393 
394     /**
395       * Clears the value of the 'salary' field.
396       * @return This builder.
397       */
398     public Builder clearSalary() {
399       fieldSetFlags()[2] = false;
400       return this;
401     }
402 
403     /**
404       * Gets the value of the 'age' field.
405       * @return The value.
406       */
407     public Integer getAge() {
408       return age;
409     }
410 
411     /**
412       * Sets the value of the 'age' field.
413       * @param value The value of 'age'.
414       * @return This builder.
415       */
416     public Builder setAge(int value) {
417       validate(fields()[3], value);
418       this.age = value;
419       fieldSetFlags()[3] = true;
420       return this;
421     }
422 
423     /**
424       * Checks whether the 'age' field has been set.
425       * @return True if the 'age' field has been set, false otherwise.
426       */
427     public boolean hasAge() {
428       return fieldSetFlags()[3];
429     }
430 
431 
432     /**
433       * Clears the value of the 'age' field.
434       * @return This builder.
435       */
436     public Builder clearAge() {
437       fieldSetFlags()[3] = false;
438       return this;
439     }
440 
441     /**
442       * Gets the value of the 'address' field.
443       * @return The value.
444       */
445     public CharSequence getAddress() {
446       return address;
447     }
448 
449     /**
450       * Sets the value of the 'address' field.
451       * @param value The value of 'address'.
452       * @return This builder.
453       */
454     public Builder setAddress(CharSequence value) {
455       validate(fields()[4], value);
456       this.address = value;
457       fieldSetFlags()[4] = true;
458       return this;
459     }
460 
461     /**
462       * Checks whether the 'address' field has been set.
463       * @return True if the 'address' field has been set, false otherwise.
464       */
465     public boolean hasAddress() {
466       return fieldSetFlags()[4];
467     }
468 
469 
470     /**
471       * Clears the value of the 'address' field.
472       * @return This builder.
473       */
474     public Builder clearAddress() {
475       address = null;
476       fieldSetFlags()[4] = false;
477       return this;
478     }
479 
480 
481     @SuppressWarnings("unchecked")
482     public Emp build() {
483       try {
484         Emp record = new Emp();
485         record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]);
486         record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]);
487         record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]);
488         record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]);
489         record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]);
490         return record;
491       } catch (Exception e) {
492         throw new org.apache.avro.AvroRuntimeException(e);
493       }
494     }
495   }
496 
497   @SuppressWarnings("unchecked")
498   private static final org.apache.avro.io.DatumWriter<Emp>
499     WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$);
500 
501   @Override public void writeExternal(java.io.ObjectOutput out)
502     throws java.io.IOException {
503     WRITER$.write(this, SpecificData.getEncoder(out));
504   }
505 
506   @SuppressWarnings("unchecked")
507   private static final org.apache.avro.io.DatumReader<Emp>
508     READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$);
509 
510   @Override public void readExternal(java.io.ObjectInput in)
511     throws java.io.IOException {
512     READER$.read(this, SpecificData.getDecoder(in));
513   }
514 
515 }
Emp.java 文件内容
 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import java.io.Serializable;
 9 
10 public class Emp2 implements Serializable {
11     private int id;
12     private String name;
13     private int age;
14     private String address;
15     private int salary;
16 
17     public int getId() {
18         return id;
19     }
20     public void setId(int id) {
21         this.id = id;
22     }
23     public String getName() {
24         return name;
25     }
26     public void setName(String name) {
27         this.name = name;
28     }
29     public int getAge() {
30         return age;
31     }
32     public void setAge(int age) {
33         this.age = age;
34     }
35     public String getAddress() {
36         return address;
37     }
38     public void setAddress(String address) {
39         this.address = address;
40     }
41     public int getSalary() {
42         return salary;
43     }
44     public void setSalary(int salary) {
45         this.salary = salary;
46     }
47 }
Emp2.java 文件内容
 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import org.apache.hadoop.io.Writable;
 9 import tutorialspoint.com.Emp;
10 
11 import java.io.DataInput;
12 import java.io.DataOutput;
13 import java.io.IOException;
14 
15 public class EmpWritable implements Writable {
16     private Emp emp;
17 
18     public Emp getEmp() {
19         return emp;
20     }
21 
22     public void setEmp(Emp emp) {
23         this.emp = emp;
24 
25     }
26 
27 
28     public EmpWritable() {
29         this.emp = emp;
30     }
31 
32     //串行化
33     public void write(DataOutput dataOutput) throws IOException {
34         dataOutput.writeUTF((String)emp.getName());
35         dataOutput.writeInt(emp.getId());
36         dataOutput.writeInt(emp.getSalary());
37         dataOutput.writeInt(emp.getAge());
38         dataOutput.writeUTF((String)emp.getAddress());
39     }
40 
41     //发串行化,我们测试的是串行化,咱们不需要写反串行化的代码,空实现即可。
42     public void readFields(DataInput dataInput) {
43     }
44 }
EmpWritable.java 文件内容
/*
@author :yinzhengjie
Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
EMAIL:y1053419035@qq.com
*/
package cn.org.yinzhengjie.avro;

import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import tutorialspoint.com.Emp;

import java.io.*;

public class TestAvro {
   private static  final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avroTest");
   private static final File java = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.javaTest");
   private static final File hadoop = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.hadoopTest");

    public static void main(String[] args) throws IOException {
        testAvroSerial();
        testJavaSerial();
        testHadoopSerial();
    }

    //测试hadoop的序列化方式
    public static void testHadoopSerial() throws IOException {
        long start = System.currentTimeMillis();
        //定义对象,并给对象赋初值
        Emp p = new Emp();
        p.setId(3);
        p.setName("jay");
        p.setAge(30);
        p.setSalary(15000);
        p.setAddress("昌平");
        EmpWritable ew = new EmpWritable();
        ew.setEmp(p);
        FileOutputStream fos = new FileOutputStream(hadoop);
        DataOutputStream dos = new DataOutputStream(fos);
        for (int i = 0;i <= 10000000;i++){
            ew.write(dos);
        }
        fos.close();
        dos.close();
        System.out.printf("这是Hadoop序列化方式: 生成文件大小:[%d],用时:[%d]\n",hadoop.length(),System.currentTimeMillis() - start);
    }



    //测试Avro序列化
    public static void testAvroSerial() throws IOException {
        long start = System.currentTimeMillis();
        //定义对象,并给对象赋初值
        Emp yinzhengjie = new Emp();
        yinzhengjie.setId(1);
        yinzhengjie.setName("尹正杰");
        yinzhengjie.setAge(18);
        yinzhengjie.setSalary(80000);
        yinzhengjie.setAddress("北京");

        //初始化writer对象,我习惯称它为写入器
        DatumWriter<Emp> dw = new SpecificDatumWriter<Emp>(Emp.class);
        //初始化文件写入器
        DataFileWriter<Emp> dfw = new DataFileWriter<Emp>(dw);
        //开始序列化文件,这个 Emp2.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。
        dfw.create(Emp.SCHEMA$,avro);
        //在序列化文件中追加对象
        for (int i = 0;i <= 10000000;i++){
            dfw.append(yinzhengjie);
        }
        //释放资源
        dfw.close();
        System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);
    }

   //测试Java序列化
    public static void testJavaSerial() throws IOException {
        long start = System.currentTimeMillis();
        Emp2 p = new Emp2();
        p.setId(2);
        p.setName("tom");
        p.setAge(20);
        p.setSalary(13000);
        p.setAddress("亦庄");
        FileOutputStream fos = new FileOutputStream(java);
        ObjectOutputStream oos = new ObjectOutputStream(fos);
        for (int i = 0;i <= 10000000;i++){
            oos.writeObject(p);
        }
        fos.close();
        oos.close();
        System.out.printf("这是Java序列化方式: 生成文件大小:[%d],用时:[%d]\n",java.length(),(System.currentTimeMillis()-start));
    }
}


/*
以上代码执行结果如下:
这是Avro序列化方式: 生成文件大小:[220072462],用时:[2570]
这是Java序列化方式: 生成文件大小:[50000139],用时:[9876]
这是Hadoop序列化方式: 生成文件大小:[250000025],用时:[126290]
 */

  经过测试对一个,序列化通一个对象时,用时最短的是Avro,用时最长的是Hadoop,生成文件最小的是Java,生成文件最大是Hadoop。

3>.编写反串行化代码

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import org.apache.avro.file.DataFileReader;
 9 import org.apache.avro.file.DataFileWriter;
10 import org.apache.avro.io.DatumReader;
11 import org.apache.avro.io.DatumWriter;
12 import org.apache.avro.specific.SpecificDatumReader;
13 import org.apache.avro.specific.SpecificDatumWriter;
14 import tutorialspoint.com.Emp;
15 
16 import java.io.*;
17 
18 public class TestAvro {
19    private static  final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avroTest");
20 
21     public static void main(String[] args) throws IOException {
22         AvroSerial();
23         AvroDeserial();
24     }
25 
26     //测试Avro序列化
27     public static void AvroSerial() throws IOException {
28         long start = System.currentTimeMillis();
29         //定义对象,并给对象赋初值
30         Emp yinzhengjie = new Emp();
31         yinzhengjie.setId(1);
32         yinzhengjie.setName("尹正杰");
33         yinzhengjie.setAge(18);
34         yinzhengjie.setSalary(80000);
35         yinzhengjie.setAddress("北京");
36         //初始化writer对象,我习惯称它为写入器
37         DatumWriter<Emp> dw = new SpecificDatumWriter<Emp>(Emp.class);
38         //初始化文件写入器
39         DataFileWriter<Emp> dfw = new DataFileWriter<Emp>(dw);
40         //开始序列化文件,这个 Emp2.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。
41         dfw.create(Emp.SCHEMA$,avro);
42         //在序列化文件中追加对象
43         for (int i = 0;i <= 10000000;i++){
44             dfw.append(yinzhengjie);
45         }
46         //释放资源
47         dfw.close();
48         System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);
49     }
50 
51 
52     //测试Avro反序列化
53     public static void AvroDeserial() throws IOException {
54         long start = System.currentTimeMillis();
55         //初始化reader对象
56         DatumReader<Emp> dr = new SpecificDatumReader<Emp>(Emp.class);
57         //初始化文件阅读器
58         DataFileReader<Emp> dfr = new DataFileReader<Emp>(avro,dr);
59         //遍历阅读器里面的内容
60         while (dfr.hasNext()){
61             Emp emp = dfr.next();
62             Integer id = emp.getId();
63             CharSequence name = emp.getName();
64             Integer age = emp.getAge();
65             Integer salary = emp.getSalary();
66             CharSequence address = emp.getAddress();
67 //            System.out.println(id + "|" + name + "|" + age + "|" + salary + "|" + address);       不建议打印,因为1000万条数据估计得30~40秒左右才能输出完成。
68         }
69         System.out.printf("这是Avro反序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);
70     }
71 
72 }
73 
74 
75 /*
76 以上代码执行结果如下:
77 这是Avro序列化方式: 生成文件大小:[220072462],用时:[2640]
78 这是Avro反序列化方式: 生成文件大小:[220072462],用时:[2866]
79  */

4>.测试java、hadoop、avro对10000000个对象反串行化速度比对

  1 /**
  2  * Autogenerated by Avro
  3  *
  4  * DO NOT EDIT DIRECTLY
  5  */
  6 package tutorialspoint.com;
  7 
  8 import org.apache.avro.specific.SpecificData;
  9 import org.apache.avro.message.BinaryMessageEncoder;
 10 import org.apache.avro.message.BinaryMessageDecoder;
 11 import org.apache.avro.message.SchemaStore;
 12 
 13 @SuppressWarnings("all")
 14 @org.apache.avro.specific.AvroGenerated
 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
 16   private static final long serialVersionUID = 6405205887550658768L;
 17   public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}");
 18   public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
 19 
 20   private static SpecificData MODEL$ = new SpecificData();
 21 
 22   private static final BinaryMessageEncoder<Emp> ENCODER =
 23       new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$);
 24 
 25   private static final BinaryMessageDecoder<Emp> DECODER =
 26       new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$);
 27 
 28   /**
 29    * Return the BinaryMessageDecoder instance used by this class.
 30    */
 31   public static BinaryMessageDecoder<Emp> getDecoder() {
 32     return DECODER;
 33   }
 34 
 35   /**
 36    * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
 37    * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
 38    */
 39   public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) {
 40     return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver);
 41   }
 42 
 43   /** Serializes this Emp to a ByteBuffer. */
 44   public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
 45     return ENCODER.encode(this);
 46   }
 47 
 48   /** Deserializes a Emp from a ByteBuffer. */
 49   public static Emp fromByteBuffer(
 50       java.nio.ByteBuffer b) throws java.io.IOException {
 51     return DECODER.decode(b);
 52   }
 53 
 54   @Deprecated public CharSequence name;
 55   @Deprecated public int id;
 56   @Deprecated public int salary;
 57   @Deprecated public int age;
 58   @Deprecated public CharSequence address;
 59 
 60   /**
 61    * Default constructor.  Note that this does not initialize fields
 62    * to their default values from the schema.  If that is desired then
 63    * one should use <code>newBuilder()</code>.
 64    */
 65   public Emp() {}
 66 
 67   /**
 68    * All-args constructor.
 69    * @param name The new value for name
 70    * @param id The new value for id
 71    * @param salary The new value for salary
 72    * @param age The new value for age
 73    * @param address The new value for address
 74    */
 75   public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) {
 76     this.name = name;
 77     this.id = id;
 78     this.salary = salary;
 79     this.age = age;
 80     this.address = address;
 81   }
 82 
 83   public org.apache.avro.Schema getSchema() { return SCHEMA$; }
 84   // Used by DatumWriter.  Applications should not call.
 85   public Object get(int field$) {
 86     switch (field$) {
 87     case 0: return name;
 88     case 1: return id;
 89     case 2: return salary;
 90     case 3: return age;
 91     case 4: return address;
 92     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
 93     }
 94   }
 95 
 96   // Used by DatumReader.  Applications should not call.
 97   @SuppressWarnings(value="unchecked")
 98   public void put(int field$, Object value$) {
 99     switch (field$) {
100     case 0: name = (CharSequence)value$; break;
101     case 1: id = (Integer)value$; break;
102     case 2: salary = (Integer)value$; break;
103     case 3: age = (Integer)value$; break;
104     case 4: address = (CharSequence)value$; break;
105     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
106     }
107   }
108 
109   /**
110    * Gets the value of the 'name' field.
111    * @return The value of the 'name' field.
112    */
113   public CharSequence getName() {
114     return name;
115   }
116 
117   /**
118    * Sets the value of the 'name' field.
119    * @param value the value to set.
120    */
121   public void setName(CharSequence value) {
122     this.name = value;
123   }
124 
125   /**
126    * Gets the value of the 'id' field.
127    * @return The value of the 'id' field.
128    */
129   public Integer getId() {
130     return id;
131   }
132 
133   /**
134    * Sets the value of the 'id' field.
135    * @param value the value to set.
136    */
137   public void setId(Integer value) {
138     this.id = value;
139   }
140 
141   /**
142    * Gets the value of the 'salary' field.
143    * @return The value of the 'salary' field.
144    */
145   public Integer getSalary() {
146     return salary;
147   }
148 
149   /**
150    * Sets the value of the 'salary' field.
151    * @param value the value to set.
152    */
153   public void setSalary(Integer value) {
154     this.salary = value;
155   }
156 
157   /**
158    * Gets the value of the 'age' field.
159    * @return The value of the 'age' field.
160    */
161   public Integer getAge() {
162     return age;
163   }
164 
165   /**
166    * Sets the value of the 'age' field.
167    * @param value the value to set.
168    */
169   public void setAge(Integer value) {
170     this.age = value;
171   }
172 
173   /**
174    * Gets the value of the 'address' field.
175    * @return The value of the 'address' field.
176    */
177   public CharSequence getAddress() {
178     return address;
179   }
180 
181   /**
182    * Sets the value of the 'address' field.
183    * @param value the value to set.
184    */
185   public void setAddress(CharSequence value) {
186     this.address = value;
187   }
188 
189   /**
190    * Creates a new Emp RecordBuilder.
191    * @return A new Emp RecordBuilder
192    */
193   public static Builder newBuilder() {
194     return new Builder();
195   }
196 
197   /**
198    * Creates a new Emp RecordBuilder by copying an existing Builder.
199    * @param other The existing builder to copy.
200    * @return A new Emp RecordBuilder
201    */
202   public static Builder newBuilder(Builder other) {
203     return new Builder(other);
204   }
205 
206   /**
207    * Creates a new Emp RecordBuilder by copying an existing Emp instance.
208    * @param other The existing instance to copy.
209    * @return A new Emp RecordBuilder
210    */
211   public static Builder newBuilder(Emp other) {
212     return new Builder(other);
213   }
214 
215   /**
216    * RecordBuilder for Emp instances.
217    */
218   public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp>
219     implements org.apache.avro.data.RecordBuilder<Emp> {
220 
221     private CharSequence name;
222     private int id;
223     private int salary;
224     private int age;
225     private CharSequence address;
226 
227     /** Creates a new Builder */
228     private Builder() {
229       super(SCHEMA$);
230     }
231 
232     /**
233      * Creates a Builder by copying an existing Builder.
234      * @param other The existing Builder to copy.
235      */
236     private Builder(Builder other) {
237       super(other);
238       if (isValidValue(fields()[0], other.name)) {
239         this.name = data().deepCopy(fields()[0].schema(), other.name);
240         fieldSetFlags()[0] = true;
241       }
242       if (isValidValue(fields()[1], other.id)) {
243         this.id = data().deepCopy(fields()[1].schema(), other.id);
244         fieldSetFlags()[1] = true;
245       }
246       if (isValidValue(fields()[2], other.salary)) {
247         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
248         fieldSetFlags()[2] = true;
249       }
250       if (isValidValue(fields()[3], other.age)) {
251         this.age = data().deepCopy(fields()[3].schema(), other.age);
252         fieldSetFlags()[3] = true;
253       }
254       if (isValidValue(fields()[4], other.address)) {
255         this.address = data().deepCopy(fields()[4].schema(), other.address);
256         fieldSetFlags()[4] = true;
257       }
258     }
259 
260     /**
261      * Creates a Builder by copying an existing Emp instance
262      * @param other The existing instance to copy.
263      */
264     private Builder(Emp other) {
265             super(SCHEMA$);
266       if (isValidValue(fields()[0], other.name)) {
267         this.name = data().deepCopy(fields()[0].schema(), other.name);
268         fieldSetFlags()[0] = true;
269       }
270       if (isValidValue(fields()[1], other.id)) {
271         this.id = data().deepCopy(fields()[1].schema(), other.id);
272         fieldSetFlags()[1] = true;
273       }
274       if (isValidValue(fields()[2], other.salary)) {
275         this.salary = data().deepCopy(fields()[2].schema(), other.salary);
276         fieldSetFlags()[2] = true;
277       }
278       if (isValidValue(fields()[3], other.age)) {
279         this.age = data().deepCopy(fields()[3].schema(), other.age);
280         fieldSetFlags()[3] = true;
281       }
282       if (isValidValue(fields()[4], other.address)) {
283         this.address = data().deepCopy(fields()[4].schema(), other.address);
284         fieldSetFlags()[4] = true;
285       }
286     }
287 
288     /**
289       * Gets the value of the 'name' field.
290       * @return The value.
291       */
292     public CharSequence getName() {
293       return name;
294     }
295 
296     /**
297       * Sets the value of the 'name' field.
298       * @param value The value of 'name'.
299       * @return This builder.
300       */
301     public Builder setName(CharSequence value) {
302       validate(fields()[0], value);
303       this.name = value;
304       fieldSetFlags()[0] = true;
305       return this;
306     }
307 
308     /**
309       * Checks whether the 'name' field has been set.
310       * @return True if the 'name' field has been set, false otherwise.
311       */
312     public boolean hasName() {
313       return fieldSetFlags()[0];
314     }
315 
316 
317     /**
318       * Clears the value of the 'name' field.
319       * @return This builder.
320       */
321     public Builder clearName() {
322       name = null;
323       fieldSetFlags()[0] = false;
324       return this;
325     }
326 
327     /**
328       * Gets the value of the 'id' field.
329       * @return The value.
330       */
331     public Integer getId() {
332       return id;
333     }
334 
335     /**
336       * Sets the value of the 'id' field.
337       * @param value The value of 'id'.
338       * @return This builder.
339       */
340     public Builder setId(int value) {
341       validate(fields()[1], value);
342       this.id = value;
343       fieldSetFlags()[1] = true;
344       return this;
345     }
346 
347     /**
348       * Checks whether the 'id' field has been set.
349       * @return True if the 'id' field has been set, false otherwise.
350       */
351     public boolean hasId() {
352       return fieldSetFlags()[1];
353     }
354 
355 
356     /**
357       * Clears the value of the 'id' field.
358       * @return This builder.
359       */
360     public Builder clearId() {
361       fieldSetFlags()[1] = false;
362       return this;
363     }
364 
365     /**
366       * Gets the value of the 'salary' field.
367       * @return The value.
368       */
369     public Integer getSalary() {
370       return salary;
371     }
372 
373     /**
374       * Sets the value of the 'salary' field.
375       * @param value The value of 'salary'.
376       * @return This builder.
377       */
378     public Builder setSalary(int value) {
379       validate(fields()[2], value);
380       this.salary = value;
381       fieldSetFlags()[2] = true;
382       return this;
383     }
384 
385     /**
386       * Checks whether the 'salary' field has been set.
387       * @return True if the 'salary' field has been set, false otherwise.
388       */
389     public boolean hasSalary() {
390       return fieldSetFlags()[2];
391     }
392 
393 
394     /**
395       * Clears the value of the 'salary' field.
396       * @return This builder.
397       */
398     public Builder clearSalary() {
399       fieldSetFlags()[2] = false;
400       return this;
401     }
402 
403     /**
404       * Gets the value of the 'age' field.
405       * @return The value.
406       */
407     public Integer getAge() {
408       return age;
409     }
410 
411     /**
412       * Sets the value of the 'age' field.
413       * @param value The value of 'age'.
414       * @return This builder.
415       */
416     public Builder setAge(int value) {
417       validate(fields()[3], value);
418       this.age = value;
419       fieldSetFlags()[3] = true;
420       return this;
421     }
422 
423     /**
424       * Checks whether the 'age' field has been set.
425       * @return True if the 'age' field has been set, false otherwise.
426       */
427     public boolean hasAge() {
428       return fieldSetFlags()[3];
429     }
430 
431 
432     /**
433       * Clears the value of the 'age' field.
434       * @return This builder.
435       */
436     public Builder clearAge() {
437       fieldSetFlags()[3] = false;
438       return this;
439     }
440 
441     /**
442       * Gets the value of the 'address' field.
443       * @return The value.
444       */
445     public CharSequence getAddress() {
446       return address;
447     }
448 
449     /**
450       * Sets the value of the 'address' field.
451       * @param value The value of 'address'.
452       * @return This builder.
453       */
454     public Builder setAddress(CharSequence value) {
455       validate(fields()[4], value);
456       this.address = value;
457       fieldSetFlags()[4] = true;
458       return this;
459     }
460 
461     /**
462       * Checks whether the 'address' field has been set.
463       * @return True if the 'address' field has been set, false otherwise.
464       */
465     public boolean hasAddress() {
466       return fieldSetFlags()[4];
467     }
468 
469 
470     /**
471       * Clears the value of the 'address' field.
472       * @return This builder.
473       */
474     public Builder clearAddress() {
475       address = null;
476       fieldSetFlags()[4] = false;
477       return this;
478     }
479 
480 
481     @SuppressWarnings("unchecked")
482     public Emp build() {
483       try {
484         Emp record = new Emp();
485         record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]);
486         record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]);
487         record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]);
488         record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]);
489         record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]);
490         return record;
491       } catch (Exception e) {
492         throw new org.apache.avro.AvroRuntimeException(e);
493       }
494     }
495   }
496 
497   @SuppressWarnings("unchecked")
498   private static final org.apache.avro.io.DatumWriter<Emp>
499     WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$);
500 
501   @Override public void writeExternal(java.io.ObjectOutput out)
502     throws java.io.IOException {
503     WRITER$.write(this, SpecificData.getEncoder(out));
504   }
505 
506   @SuppressWarnings("unchecked")
507   private static final org.apache.avro.io.DatumReader<Emp>
508     READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$);
509 
510   @Override public void readExternal(java.io.ObjectInput in)
511     throws java.io.IOException {
512     READER$.read(this, SpecificData.getDecoder(in));
513   }
514 
515 }
Emp.java 文件内容
 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import java.io.Serializable;
 9 
10 public class Emp2 implements Serializable {
11     private int id;
12     private String name;
13     private int age;
14     private String address;
15     private int salary;
16 
17     public int getId() {
18         return id;
19     }
20     public void setId(int id) {
21         this.id = id;
22     }
23     public String getName() {
24         return name;
25     }
26     public void setName(String name) {
27         this.name = name;
28     }
29     public int getAge() {
30         return age;
31     }
32     public void setAge(int age) {
33         this.age = age;
34     }
35     public String getAddress() {
36         return address;
37     }
38     public void setAddress(String address) {
39         this.address = address;
40     }
41     public int getSalary() {
42         return salary;
43     }
44     public void setSalary(int salary) {
45         this.salary = salary;
46     }
47 }
Emp2.java 文件内容
 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import org.apache.hadoop.io.Writable;
 9 import tutorialspoint.com.Emp;
10 
11 import java.io.DataInput;
12 import java.io.DataOutput;
13 import java.io.IOException;
14 
15 public class EmpWritable implements Writable {
16     private Emp emp = new Emp();
17 
18     public Emp getEmp() {
19         return emp;
20     }
21 
22     public void setEmp(Emp emp) {
23         this.emp = emp;
24 
25     }
26 
27 
28     public EmpWritable() {
29     }
30 
31     //串行化
32     public void write(DataOutput dataOutput) throws IOException {
33         dataOutput.writeUTF((String)emp.getName());
34         dataOutput.writeInt(emp.getId());
35         dataOutput.writeInt(emp.getSalary());
36         dataOutput.writeInt(emp.getAge());
37         dataOutput.writeUTF((String)emp.getAddress());
38     }
39 
40     //发串行化
41     public void readFields(DataInput dataInput) throws IOException {
42         emp.setName((CharSequence)dataInput.readUTF());
43         emp.setId(dataInput.readInt());
44         emp.setSalary(dataInput.readInt());
45         emp.setAge(dataInput.readInt());
46         emp.setAddress((CharSequence)dataInput.readUTF());
47     }
48 }
EmpWritable.java 文件内容
 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import org.apache.avro.file.DataFileReader;
 9 import org.apache.avro.io.DatumReader;
10 import org.apache.avro.specific.SpecificDatumReader;
11 import tutorialspoint.com.Emp;
12 
13 import java.io.*;
14 
15 public class test {
16     private static  final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avroTest");
17     private static final File java = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.javaTest");
18     private static final File hadoop = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.hadoopTest");
19 
20     public static void main(String[] args) throws Exception {
21         AvroDeserial();
22         JavaDeserial();
23         HadoopDeserial();
24     }
25 
26     //测试hadoop的反序列化方式
27     public static void HadoopDeserial() throws IOException {
28         long start = System.currentTimeMillis();
29         EmpWritable ew = new EmpWritable();
30         FileInputStream fis = new FileInputStream(hadoop);
31         DataInputStream dis = new DataInputStream(fis);
32 
33         for (int i = 0;i <= 10000000;i++){
34             ew.readFields(dis);
35 //            Emp emp = ew.getEmp();
36 //            System.out.println(emp);
37         }
38         fis.close();
39         dis.close();
40         System.out.printf("这是Hadoop反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",hadoop.length(),System.currentTimeMillis() - start);
41     }
42 
43 
44     //测试Avro反序列化
45     public static void AvroDeserial() throws IOException {
46         long start = System.currentTimeMillis();
47         //初始化reader对象
48         DatumReader<Emp> dr = new SpecificDatumReader<Emp>(Emp.class);
49         //初始化文件阅读器
50         DataFileReader<Emp> dfr = new DataFileReader<Emp>(avro,dr);
51         //遍历阅读器里面的内容
52         while (dfr.hasNext()){
53             Emp emp = dfr.next();
54             Integer id = emp.getId();
55             CharSequence name = emp.getName();
56             Integer age = emp.getAge();
57             Integer salary = emp.getSalary();
58             CharSequence address = emp.getAddress();
59 //            System.out.println(id + "|" + name + "|" + age + "|" + salary + "|" + address);       不建议打印,因为1000万条数据估计得30~40秒左右才能输出完成。
60         }
61         System.out.printf("这是Avro反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);
62     }
63 
64     //测试Java反序列化
65     public static void JavaDeserial() throws Exception {
66         long start = System.currentTimeMillis();
67 
68         FileInputStream fis = new FileInputStream(java);
69         ObjectInputStream ois = new ObjectInputStream(fis);
70         for (int i = 0;i <= 10000000;i++){
71             Emp2 emp = (Emp2) ois.readObject();
72         }
73         fis.close();
74         ois.close();
75         System.out.printf("这是Java反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",java.length(),(System.currentTimeMillis()-start));
76     }
77 }
78 
79 
80 /*
81 以上代码执行结果如下:
82 这是Avro反序列化方式: 反序列化文件大小:[220072462],用时:[3479]
83 这是Java反序列化方式: 反序列化文件大小:[50000139],用时:[15677]
84 这是Hadoop反序列化方式: 反序列化文件大小:[250000025],用时:[134628]
85  */

  经测试,将一个对象进行1000万次序列化到一个文件后,在反序列化回来,用时最短的依然是Avro,Java用时次之,Hadoop依然用时最长。

 

四.avro通过不生成代码,直接使用schema的方式对对象进行串行化

   通过上面的编程发现如果想要通过Avro进行序列化很繁琐,在序列化之前需要准备环境,需要通过编译工具生成代码,然后在导入IDE中,进行编程,那有么有不生成代码就可以进行序列化操作的呢?答案是肯定的,大致流程如下图:

   实现代码如下:

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.avro;
 7 
 8 import org.apache.avro.Schema;
 9 import org.apache.avro.file.DataFileReader;
10 import org.apache.avro.file.DataFileWriter;
11 import org.apache.avro.generic.GenericData;
12 import org.apache.avro.generic.GenericRecord;
13 import org.apache.avro.io.DatumReader;
14 import org.apache.avro.io.DatumWriter;
15 import org.apache.avro.specific.SpecificDatumReader;
16 import org.apache.avro.specific.SpecificDatumWriter;
17 
18 import java.io.File;
19 import java.io.IOException;
20 
21 public class avroSchema {
22     private static final File  avscFile = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avsc");
23     private static  final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\schema.avro");
24     public static void main(String[] args) throws IOException {
25         AvroSerial();
26         AvroDeserial();
27     }
28 
29     //通过Schema的方式序列化
30     private static void AvroSerial() throws IOException {
31         long start = System.currentTimeMillis();
32         //通过文件解析schema,生成schema对象,因此需要传入emp.avsc的路径
33         Schema schema = new Schema.Parser().parse(avscFile);
34         //将schema变成了类似与map的对象
35         GenericRecord yinzhengjie = new GenericData.Record(schema);
36         yinzhengjie.put("id",1);
37         yinzhengjie.put("name","尹正杰");
38         yinzhengjie.put("age",18);
39         yinzhengjie.put("salary",80000);
40         yinzhengjie.put("address","北京");
41         //初始化writer对象,注意,泛型应写为:GenericRecord
42         DatumWriter<GenericRecord> dw = new SpecificDatumWriter<GenericRecord>(schema);
43         //初始化文件写入器,注意,泛型应写为:GenericRecord
44         DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(dw);
45         //定义写入的路径
46         dfw.create(schema,avro);
47         for (int i = 0;i <= 10000000;i++){
48             dfw.append(yinzhengjie);
49         }
50         System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);
51     }
52 
53     //通过Schema的方式序列化
54     private static void AvroDeserial() throws IOException {
55         long start = System.currentTimeMillis();
56         //通过文件解析schema,生成schema对象,因此需要传入emp.avsc的路径
57         Schema schema = new Schema.Parser().parse(avscFile);
58         //将schema变成了类似与map的对象
59         GenericRecord yinzhengjie = new GenericData.Record(schema);
60 
61         //初始化Reader对象,注意,泛型应写为:GenericRecord
62         DatumReader<GenericRecord> dr = new SpecificDatumReader<GenericRecord>(schema);
63         //初始化文件阅读器,注意,泛型应写为:GenericRecord
64         DataFileReader<GenericRecord> dfr = new DataFileReader<GenericRecord>(avro,dr);
65         //遍历
66         while (dfr.hasNext()){
67             GenericRecord record = dfr.next();
68 //            System.out.println(record);
69         }
70         System.out.printf("这是Avro反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);
71     }
72 }
73 
74 /*
75 以上代码执行结果如下:
76 这是Avro序列化方式: 生成文件大小:[220045139],用时:[2408]
77 这是Avro反序列化方式: 反序列化文件大小:[220045139],用时:[2614]
78  */

 

posted @ 2018-06-02 23:39  尹正杰  阅读(1220)  评论(0编辑  收藏  举报