You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by su...@apache.org on 2017/05/31 15:48:44 UTC
svn commit: r1797063 [5/5] - in /avro/site/publish/docs/1.8.2: ./ examples/ examples/java-example/ examples/java-example/src/ examples/java-example/src/main/ examples/java-example/src/main/java/ examples/java-example/src/main/java/example/ examples/mr-...

Added: avro/site/publish/docs/1.8.2/spec.html
URL: http://svn.apache.org/viewvc/avro/site/publish/docs/1.8.2/spec.html?rev=1797063&view=auto
==============================================================================
--- avro/site/publish/docs/1.8.2/spec.html (added)
+++ avro/site/publish/docs/1.8.2/spec.html Wed May 31 15:48:43 2017
@@ -0,0 +1,2052 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Apache Avro&#153; 1.8.2
+ Specification</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://avro.apache.org/">Avro</a> &gt; <a href="http://avro.apache.org/">Avro</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://www.apache.org/"><img class="logoImage" alt="Apache" src="images/apache_feather.gif" title="The Apache Software Foundation"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://avro.apache.org/"><img class="logoImage" alt="Avro" src="images/avro-logo.png" title="Serialization System"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="avro.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://avro.apache.org/">Project</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop/Avro/">Wiki</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Avro 1.8.2  Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="gettingstartedjava.html">Getting started (Java)</a>
+</div>
+<div class="menuitem">
+<a href="gettingstartedpython.html">Getting started (Python)</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Specification</div>
+</div>
+<div class="menuitem">
+<a href="trevni/spec.html">Trevni</a>
+</div>
+<div class="menuitem">
+<a href="api/java/index.html">Java API</a>
+</div>
+<div class="menuitem">
+<a href="api/c/index.html">C API</a>
+</div>
+<div class="menuitem">
+<a href="api/cpp/html/index.html">C++ API</a>
+</div>
+<div class="menuitem">
+<a href="api/csharp/index.html">C# API</a>
+</div>
+<div class="menuitem">
+<a href="mr.html">MapReduce guide</a>
+</div>
+<div class="menuitem">
+<a href="idl.html">IDL language</a>
+</div>
+<div class="menuitem">
+<a href="sasl.html">SASL profile</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/Avro/">Wiki</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/Avro/FAQ">FAQ</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="spec.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Apache Avro&#153; 1.8.2
+ Specification</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#preamble">Introduction</a>
+</li>
+<li>
+<a href="#schemas">Schema Declaration</a>
+<ul class="minitoc">
+<li>
+<a href="#schema_primitive">Primitive Types</a>
+</li>
+<li>
+<a href="#schema_complex">Complex Types</a>
+<ul class="minitoc">
+<li>
+<a href="#schema_record">Records</a>
+</li>
+<li>
+<a href="#Enums">Enums</a>
+</li>
+<li>
+<a href="#Arrays">Arrays</a>
+</li>
+<li>
+<a href="#Maps">Maps</a>
+</li>
+<li>
+<a href="#Unions">Unions</a>
+</li>
+<li>
+<a href="#Fixed">Fixed</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#names">Names</a>
+</li>
+<li>
+<a href="#Aliases">Aliases</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Data+Serialization">Data Serialization</a>
+<ul class="minitoc">
+<li>
+<a href="#Encodings">Encodings</a>
+</li>
+<li>
+<a href="#binary_encoding">Binary Encoding</a>
+<ul class="minitoc">
+<li>
+<a href="#binary_encode_primitive">Primitive Types</a>
+</li>
+<li>
+<a href="#binary_encode_complex">Complex Types</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#json_encoding">JSON Encoding</a>
+</li>
+<li>
+<a href="#single_object_encoding">Single-object encoding</a>
+<ul class="minitoc">
+<li>
+<a href="#single_object_encoding_spec">Single object encoding specification</a>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>
+<a href="#order">Sort Order</a>
+</li>
+<li>
+<a href="#Object+Container+Files">Object Container Files</a>
+<ul class="minitoc">
+<li>
+<a href="#Required+Codecs">Required Codecs</a>
+<ul class="minitoc">
+<li>
+<a href="#null">null</a>
+</li>
+<li>
+<a href="#deflate">deflate</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Optional+Codecs">Optional Codecs</a>
+<ul class="minitoc">
+<li>
+<a href="#snappy">snappy</a>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Protocol+Declaration">Protocol Declaration</a>
+<ul class="minitoc">
+<li>
+<a href="#Messages">Messages</a>
+</li>
+<li>
+<a href="#Sample+Protocol">Sample Protocol</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Protocol+Wire+Format">Protocol Wire Format</a>
+<ul class="minitoc">
+<li>
+<a href="#Message+Transport">Message Transport</a>
+<ul class="minitoc">
+<li>
+<a href="#HTTP+as+Transport">HTTP as Transport</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Message+Framing">Message Framing</a>
+</li>
+<li>
+<a href="#handshake">Handshake</a>
+</li>
+<li>
+<a href="#Call+Format">Call Format</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Schema+Resolution">Schema Resolution</a>
+</li>
+<li>
+<a href="#Parsing+Canonical+Form+for+Schemas">Parsing Canonical Form for Schemas</a>
+<ul class="minitoc">
+<li>
+<a href="#Transforming+into+Parsing+Canonical+Form">Transforming into Parsing Canonical Form</a>
+</li>
+<li>
+<a href="#schema_fingerprints">Schema Fingerprints</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Logical+Types">Logical Types</a>
+<ul class="minitoc">
+<li>
+<a href="#Decimal">Decimal</a>
+</li>
+<li>
+<a href="#Date">Date</a>
+</li>
+<li>
+<a href="#Time+%28millisecond+precision%29">Time (millisecond precision)</a>
+</li>
+<li>
+<a href="#Time+%28microsecond+precision%29">Time (microsecond precision)</a>
+</li>
+<li>
+<a href="#Timestamp+%28millisecond+precision%29">Timestamp (millisecond precision)</a>
+</li>
+<li>
+<a href="#Timestamp+%28microsecond+precision%29">Timestamp (microsecond precision)</a>
+</li>
+<li>
+<a href="#Duration">Duration</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+
+    
+<a name="preamble"></a>
+<h2 class="h3">Introduction</h2>
+<div class="section">
+<p>This document defines Apache Avro.  It is intended to be the
+        authoritative specification. Implementations of Avro must
+        adhere to this document.
+      </p>
+</div>
+
+    
+<a name="schemas"></a>
+<h2 class="h3">Schema Declaration</h2>
+<div class="section">
+<p>A Schema is represented in <a href="http://www.json.org/">JSON</a> by one of:</p>
+<ul>
+        
+<li>A JSON string, naming a defined type.</li>
+
+        
+<li>A JSON object, of the form:
+
+          <pre class="code">{"type": "<em>typeName</em>" ...<em>attributes</em>...}</pre>
+
+          where <em>typeName</em> is either a primitive or derived
+          type name, as defined below.  Attributes not defined in this
+          document are permitted as metadata, but must not affect
+          the format of serialized data.
+          </li>
+        
+<li>A JSON array, representing a union of embedded types.</li>
+      
+</ul>
+<a name="schema_primitive"></a>
+<h3 class="h4">Primitive Types</h3>
+<p>The set of primitive type names is:</p>
+<ul>
+          
+<li>
+<span class="codefrag">null</span>: no value</li>
+          
+<li>
+<span class="codefrag">boolean</span>: a binary value</li>
+          
+<li>
+<span class="codefrag">int</span>: 32-bit signed integer</li>
+          
+<li>
+<span class="codefrag">long</span>: 64-bit signed integer</li>
+          
+<li>
+<span class="codefrag">float</span>: single precision (32-bit) IEEE 754 floating-point number</li>
+          
+<li>
+<span class="codefrag">double</span>: double precision (64-bit) IEEE 754 floating-point number</li>
+          
+<li>
+<span class="codefrag">bytes</span>: sequence of 8-bit unsigned bytes</li>
+          
+<li>
+<span class="codefrag">string</span>: unicode character sequence</li>
+        
+</ul>
+<p>Primitive types have no specified attributes.</p>
+<p>Primitive type names are also defined type names.  Thus, for
+          example, the schema "string" is equivalent to:</p>
+<pre class="code">{"type": "string"}</pre>
+<a name="schema_complex"></a>
+<h3 class="h4">Complex Types</h3>
+<p>Avro supports six kinds of complex types: records, enums,
+        arrays, maps, unions and fixed.</p>
+<a name="schema_record"></a>
+<h4>Records</h4>
+<p>Records use the type name "record" and support three attributes:</p>
+<ul>
+	    
+<li>
+<span class="codefrag">name</span>: a JSON string providing the name
+	    of the record (required).</li>
+	    
+<li>
+<em>namespace</em>, a JSON string that qualifies the name;</li>
+	    
+<li>
+<span class="codefrag">doc</span>: a JSON string providing documentation to the
+	    user of this schema (optional).</li>
+	    
+<li>
+<span class="codefrag">aliases:</span> a JSON array of strings, providing
+	      alternate names for this record (optional).</li>
+	    
+<li>
+<span class="codefrag">fields</span>: a JSON array, listing fields (required).
+	    Each field is a JSON object with the following attributes:
+	      <ul>
+		
+<li>
+<span class="codefrag">name</span>: a JSON string providing the name
+		  of the field (required), and </li>
+		
+<li>
+<span class="codefrag">doc</span>: a JSON string describing this field
+                  for users (optional).</li>
+		
+<li>
+<span class="codefrag">type:</span> A JSON object defining a schema, or
+		  a JSON string naming a record definition
+		  (required).</li>
+		
+<li>
+<span class="codefrag">default:</span> A default value for this
+		  field, used when reading instances that lack this
+		  field (optional).  Permitted values depend on the
+		  field's schema type, according to the table below.
+		  Default values for union fields correspond to the
+		  first schema in the union. Default values for bytes
+		  and fixed fields are JSON strings, where Unicode
+		  code points 0-255 are mapped to unsigned 8-bit byte
+		  values 0-255.
+		  <table class="right">
+		    
+<caption>field default values</caption>
+		    
+<tr>
+<th colspan="1" rowspan="1">avro type</th><th colspan="1" rowspan="1">json type</th><th colspan="1" rowspan="1">example</th>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">null</td><td colspan="1" rowspan="1">null</td><td colspan="1" rowspan="1">null</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">boolean</td><td colspan="1" rowspan="1">boolean</td><td colspan="1" rowspan="1">true</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">int,long</td><td colspan="1" rowspan="1">integer</td><td colspan="1" rowspan="1">1</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">float,double</td><td colspan="1" rowspan="1">number</td><td colspan="1" rowspan="1">1.1</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">bytes</td><td colspan="1" rowspan="1">string</td><td colspan="1" rowspan="1">"\u00FF"</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">string</td><td colspan="1" rowspan="1">string</td><td colspan="1" rowspan="1">"foo"</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">record</td><td colspan="1" rowspan="1">object</td><td colspan="1" rowspan="1">{"a": 1}</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">enum</td><td colspan="1" rowspan="1">string</td><td colspan="1" rowspan="1">"FOO"</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">array</td><td colspan="1" rowspan="1">array</td><td colspan="1" rowspan="1">[1]</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">map</td><td colspan="1" rowspan="1">object</td><td colspan="1" rowspan="1">{"a": 1}</td>
+</tr>
+		    
+<tr>
+<td colspan="1" rowspan="1">fixed</td><td colspan="1" rowspan="1">string</td><td colspan="1" rowspan="1">"\u00ff"</td>
+</tr>
+		  
+</table>
+		
+</li>
+		
+<li>
+<span class="codefrag">order:</span> specifies how this field
+		  impacts sort ordering of this record (optional).
+		  Valid values are "ascending" (the default),
+		  "descending", or "ignore".  For more details on how
+		  this is used, see the the <a href="#order">sort
+		  order</a> section below.</li>
+		
+<li>
+<span class="codefrag">aliases:</span> a JSON array of strings, providing
+		  alternate names for this field (optional).</li>
+	      
+</ul>
+	    
+</li>
+	  
+</ul>
+<p>For example, a linked-list of 64-bit values may be defined with:</p>
+<pre class="code">
+{
+  "type": "record",
+  "name": "LongList",
+  "aliases": ["LinkedLongs"],                      // old name for this
+  "fields" : [
+    {"name": "value", "type": "long"},             // each element has a long
+    {"name": "next", "type": ["null", "LongList"]} // optional next element
+  ]
+}
+	  </pre>
+<a name="Enums"></a>
+<h4>Enums</h4>
+<p>Enums use the type name "enum" and support the following
+	  attributes:</p>
+<ul>
+	    
+<li>
+<span class="codefrag">name</span>: a JSON string providing the name
+	    of the enum (required).</li>
+	    
+<li>
+<em>namespace</em>, a JSON string that qualifies the name;</li>
+	    
+<li>
+<span class="codefrag">aliases:</span> a JSON array of strings, providing
+	      alternate names for this enum (optional).</li>
+	    
+<li>
+<span class="codefrag">doc</span>: a JSON string providing documentation to the
+	    user of this schema (optional).</li>
+	    
+<li>
+<span class="codefrag">symbols</span>: a JSON array, listing symbols,
+	    as JSON strings (required).  All symbols in an enum must
+	    be unique; duplicates are prohibited.  Every symbol must
+	    match the regular expression <span class="codefrag">[A-Za-z_][A-Za-z0-9_]*</span>
+	    (the same requirement as for <a href="#names">names</a>).</li>
+	  
+</ul>
+<p>For example, playing card suits might be defined with:</p>
+<pre class="code">
+{ "type": "enum",
+  "name": "Suit",
+  "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]
+}
+	  </pre>
+<a name="Arrays"></a>
+<h4>Arrays</h4>
+<p>Arrays use the type name <span class="codefrag">"array"</span> and support
+          a single attribute:</p>
+<ul>
+            
+<li>
+<span class="codefrag">items</span>: the schema of the array's items.</li>
+	  
+</ul>
+<p>For example, an array of strings is declared
+	  with:</p>
+<pre class="code">{"type": "array", "items": "string"}</pre>
+<a name="Maps"></a>
+<h4>Maps</h4>
+<p>Maps use the type name <span class="codefrag">"map"</span> and support
+          one attribute:</p>
+<ul>
+            
+<li>
+<span class="codefrag">values</span>: the schema of the map's values.</li>
+	  
+</ul>
+<p>Map keys are assumed to be strings.</p>
+<p>For example, a map from string to long is declared
+	  with:</p>
+<pre class="code">{"type": "map", "values": "long"}</pre>
+<a name="Unions"></a>
+<h4>Unions</h4>
+<p>Unions, as mentioned above, are represented using JSON
+          arrays.  For example, <span class="codefrag">["null", "string"]</span>
+          declares a schema which may be either a null or string.</p>
+<p>(Note that when a <a href="#schema_record">default
+          value</a> is specified for a record field whose type is a
+          union, the type of the default value must match the
+          <em>first</em> element of the union.  Thus, for unions
+          containing "null", the "null" is usually listed first, since
+          the default value of such unions is typically null.)</p>
+<p>Unions may not contain more than one schema with the same
+	  type, except for the named types record, fixed and enum.  For
+	  example, unions containing two array types or two map types
+	  are not permitted, but two types with different names are
+	  permitted.  (Names permit efficient resolution when reading
+	  and writing unions.)</p>
+<p>Unions may not immediately contain other unions.</p>
+<a name="Fixed"></a>
+<h4>Fixed</h4>
+<p>Fixed uses the type name <span class="codefrag">"fixed"</span> and supports
+          two attributes:</p>
+<ul>
+	    
+<li>
+<span class="codefrag">name</span>: a string naming this fixed (required).</li>
+	    
+<li>
+<em>namespace</em>, a string that qualifies the name;</li>
+	    
+<li>
+<span class="codefrag">aliases:</span> a JSON array of strings, providing
+	      alternate names for this enum (optional).</li>
+            
+<li>
+<span class="codefrag">size</span>: an integer, specifying the number
+            of bytes per value (required).</li>
+	  
+</ul>
+<p>For example, 16-byte quantity may be declared with:</p>
+<pre class="code">{"type": "fixed", "size": 16, "name": "md5"}</pre>
+<a name="names"></a>
+<h3 class="h4">Names</h3>
+<p>Record, enums and fixed are named types.  Each has
+          a <em>fullname</em> that is composed of two parts;
+          a <em>name</em> and a <em>namespace</em>.  Equality of names
+          is defined on the fullname.</p>
+<p>The name portion of a fullname, record field names, and
+	  enum symbols must:</p>
+<ul>
+          
+<li>start with <span class="codefrag">[A-Za-z_]</span>
+</li>
+          
+<li>subsequently contain only <span class="codefrag">[A-Za-z0-9_]</span>
+</li>
+	
+</ul>
+<p>A namespace is a dot-separated sequence of such names.
+        The empty string may also be used as a namespace to indicate the
+        null namespace.
+        Equality of names (including field names and enum symbols)
+        as well as fullnames is case-sensitive.</p>
+<p>In record, enum and fixed definitions, the fullname is
+        determined in one of the following ways:</p>
+<ul>
+	  
+<li>A name and namespace are both specified.  For example,
+	  one might use <span class="codefrag">"name": "X", "namespace":
+	  "org.foo"</span> to indicate the
+	  fullname <span class="codefrag">org.foo.X</span>.</li>
+	  
+<li>A fullname is specified.  If the name specified contains
+	  a dot, then it is assumed to be a fullname, and any
+	  namespace also specified is ignored.  For example,
+	  use <span class="codefrag">"name": "org.foo.X"</span> to indicate the
+	  fullname <span class="codefrag">org.foo.X</span>.</li>
+	  
+<li>A name only is specified, i.e., a name that contains no
+	  dots.  In this case the namespace is taken from the most
+	  tightly enclosing schema or protocol.  For example,
+	  if <span class="codefrag">"name": "X"</span> is specified, and this occurs
+	  within a field of the record definition
+	  of <span class="codefrag">org.foo.Y</span>, then the fullname
+	  is <span class="codefrag">org.foo.X</span>. If there is no enclosing
+	  namespace then the null namespace is used.</li>
+	
+</ul>
+<p>References to previously defined names are as in the latter
+	two cases above: if they contain a dot they are a fullname, if
+	they do not contain a dot, the namespace is the namespace of
+	the enclosing definition.</p>
+<p>Primitive type names have no namespace and their names may
+	not be defined in any namespace.</p>
+<p> A schema or protocol may not contain multiple definitions
+	of a fullname.  Further, a name must be defined before it is
+	used ("before" in the depth-first, left-to-right traversal of
+	the JSON parse tree, where the <span class="codefrag">types</span> attribute of
+	a protocol is always deemed to come "before" the
+	<span class="codefrag">messages</span> attribute.)
+	</p>
+<a name="Aliases"></a>
+<h3 class="h4">Aliases</h3>
+<p>Named types and fields may have aliases.  An implementation
+        may optionally use aliases to map a writer's schema to the
+        reader's.  This faciliates both schema evolution as well as
+        processing disparate datasets.</p>
+<p>Aliases function by re-writing the writer's schema using
+        aliases from the reader's schema.  For example, if the
+        writer's schema was named "Foo" and the reader's schema is
+        named "Bar" and has an alias of "Foo", then the implementation
+        would act as though "Foo" were named "Bar" when reading.
+        Similarly, if data was written as a record with a field named
+        "x" and is read as a record with a field named "y" with alias
+        "x", then the implementation would act as though "x" were
+        named "y" when reading.</p>
+<p>A type alias may be specified either as a fully
+        namespace-qualified, or relative to the namespace of the name
+        it is an alias for.  For example, if a type named "a.b" has
+        aliases of "c" and "x.y", then the fully qualified names of
+        its aliases are "a.c" and "x.y".</p>
+</div> <!-- end schemas -->
+
+    
+<a name="Data+Serialization"></a>
+<h2 class="h3">Data Serialization</h2>
+<div class="section">
+<p>Avro data is always serialized with its schema.  Files that
+	store Avro data should always also include the schema for that
+	data in the same file.  Avro-based remote procedure call (RPC)
+	systems must also guarantee that remote recipients of data
+	have a copy of the schema used to write that data.</p>
+<p>Because the schema used to write data is always available
+	when the data is read, Avro data itself is not tagged with
+	type information.  The schema is required to parse data.</p>
+<p>In general, both serialization and deserialization proceed as
+      a depth-first, left-to-right traversal of the schema,
+      serializing primitive types as they are encountered.</p>
+<a name="Encodings"></a>
+<h3 class="h4">Encodings</h3>
+<p>Avro specifies two serialization encodings: binary and
+	  JSON.  Most applications will use the binary encoding, as it
+	  is smaller and faster.  But, for debugging and web-based
+	  applications, the JSON encoding may sometimes be
+	  appropriate.</p>
+<a name="binary_encoding"></a>
+<h3 class="h4">Binary Encoding</h3>
+<a name="binary_encode_primitive"></a>
+<h4>Primitive Types</h4>
+<p>Primitive types are encoded in binary as follows:</p>
+<ul>
+            
+<li>
+<span class="codefrag">null</span> is written as zero bytes.</li>
+            
+<li>a <span class="codefrag">boolean</span> is written as a single byte whose
+              value is either <span class="codefrag">0</span> (false) or <span class="codefrag">1</span>
+              (true).</li>
+            
+<li>
+<span class="codefrag">int</span> and <span class="codefrag">long</span> values are written
+              using <a href="http://lucene.apache.org/java/3_5_0/fileformats.html#VInt">variable-length</a>
+	      <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html#types">zig-zag</a> coding.  Some examples:
+	      <table class="right">
+		
+<tr>
+<th colspan="1" rowspan="1">value</th><th colspan="1" rowspan="1">hex</th>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag"> 0</span></td><td colspan="1" rowspan="1"><span class="codefrag">00</span></td>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-1</span></td><td colspan="1" rowspan="1"><span class="codefrag">01</span></td>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag"> 1</span></td><td colspan="1" rowspan="1"><span class="codefrag">02</span></td>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-2</span></td><td colspan="1" rowspan="1"><span class="codefrag">03</span></td>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag"> 2</span></td><td colspan="1" rowspan="1"><span class="codefrag">04</span></td>
+</tr>
+		
+<tr>
+<td colspan="2" rowspan="1"><span class="codefrag">...</span></td>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-64</span></td><td colspan="1" rowspan="1"><span class="codefrag">7f</span></td>
+</tr>
+		
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag"> 64</span></td><td colspan="1" rowspan="1"><span class="codefrag">&nbsp;80 01</span></td>
+</tr>
+		
+<tr>
+<td colspan="2" rowspan="1"><span class="codefrag">...</span></td>
+</tr>
+	      
+</table>
+	    
+</li>
+            
+<li>a <span class="codefrag">float</span> is written as 4 bytes. The float is
+              converted into a 32-bit integer using a method equivalent
+              to <a href="http://java.sun.com/javase/6/docs/api/java/lang/Float.html#floatToIntBits%28float%29">Java's floatToIntBits</a> and then encoded
+              in little-endian format.</li>
+            
+<li>a <span class="codefrag">double</span> is written as 8 bytes. The double
+              is converted into a 64-bit integer using a method equivalent
+              to <a href="http://java.sun.com/javase/6/docs/api/java/lang/Double.html#doubleToLongBits%28double%29">Java's
+		doubleToLongBits</a> and then encoded in little-endian
+              format.</li>
+            
+<li>
+<span class="codefrag">bytes</span> are encoded as
+              a <span class="codefrag">long</span> followed by that many bytes of data.
+            </li>
+            
+<li>a <span class="codefrag">string</span> is encoded as
+              a <span class="codefrag">long</span> followed by that many bytes of UTF-8
+              encoded character data.
+              <p>For example, the three-character string "foo" would
+              be encoded as the long value 3 (encoded as
+              hex <span class="codefrag">06</span>) followed by the UTF-8 encoding of
+              'f', 'o', and 'o' (the hex bytes <span class="codefrag">66 6f
+              6f</span>):
+              </p>
+              
+<pre class="code">06 66 6f 6f</pre>
+            
+</li>
+          
+</ul>
+<a name="binary_encode_complex"></a>
+<h4>Complex Types</h4>
+<p>Complex types are encoded in binary as follows:</p>
+<a name="record_encoding"></a>
+<h5>Records</h5>
+<p>A record is encoded by encoding the values of its
+	      fields in the order that they are declared.  In other
+	      words, a record is encoded as just the concatenation of
+	      the encodings of its fields.  Field values are encoded per
+	      their schema.</p>
+<p>For example, the record schema</p>
+<pre class="code">
+	      {
+	      "type": "record",
+	      "name": "test",
+	      "fields" : [
+	      {"name": "a", "type": "long"},
+	      {"name": "b", "type": "string"}
+	      ]
+	      }
+	    </pre>
+<p>An instance of this record whose <span class="codefrag">a</span> field has
+	      value 27 (encoded as hex <span class="codefrag">36</span>) and
+	      whose <span class="codefrag">b</span> field has value "foo" (encoded as hex
+	      bytes <span class="codefrag">06 66 6f 6f</span>), would be encoded simply
+	      as the concatenation of these, namely the hex byte
+	      sequence:</p>
+<pre class="code">36 06 66 6f 6f</pre>
+<a name="enum_encoding"></a>
+<h5>Enums</h5>
+<p>An enum is encoded by a <span class="codefrag">int</span>, representing
+              the zero-based position of the symbol in the schema.</p>
+<p>For example, consider the enum:</p>
+<pre class="code">
+	      {"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] }
+	    </pre>
+<p>This would be encoded by an <span class="codefrag">int</span> between
+	      zero and three, with zero indicating "A", and 3 indicating
+	      "D".</p>
+<a name="array_encoding"></a>
+<h5>Arrays</h5>
+<p>Arrays are encoded as a series of <em>blocks</em>.
+              Each block consists of a <span class="codefrag">long</span> <em>count</em>
+              value, followed by that many array items.  A block with
+              count zero indicates the end of the array.  Each item is
+              encoded per the array's item schema.</p>
+<p>If a block's count is negative, its absolute value is used,
+              and the count is followed immediately by a <span class="codefrag">long</span>
+              block <em>size</em> indicating the number of bytes in the
+              block.  This block size permits fast skipping through data,
+              e.g., when projecting a record to a subset of its fields.</p>
+<p>For example, the array schema</p>
+<pre class="code">{"type": "array", "items": "long"}</pre>
+<p>an array containing the items 3 and 27 could be encoded
+              as the long value 2 (encoded as hex 04) followed by long
+              values 3 and 27 (encoded as hex <span class="codefrag">06 36</span>)
+              terminated by zero:</p>
+<pre class="code">04 06 36 00</pre>
+<p>The blocked representation permits one to read and write
+              arrays larger than can be buffered in memory, since one can
+              start writing items without knowing the full length of the
+              array.</p>
+<a name="map_encoding"></a>
+<h5>Maps</h5>
+<p>Maps are encoded as a series of <em>blocks</em>.  Each
+              block consists of a <span class="codefrag">long</span> <em>count</em>
+              value, followed by that many key/value pairs.  A block
+              with count zero indicates the end of the map.  Each item
+              is encoded per the map's value schema.</p>
+<p>If a block's count is negative, its absolute value is used,
+              and the count is followed immediately by a <span class="codefrag">long</span>
+              block <em>size</em> indicating the number of bytes in the
+              block.  This block size permits fast skipping through data,
+              e.g., when projecting a record to a subset of its fields.</p>
+<p>The blocked representation permits one to read and write
+              maps larger than can be buffered in memory, since one can
+              start writing items without knowing the full length of the
+              map.</p>
+<a name="union_encoding"></a>
+<h5>Unions</h5>
+<p>A union is encoded by first writing a <span class="codefrag">long</span>
+              value indicating the zero-based position within the
+              union of the schema of its value.  The value is then
+              encoded per the indicated schema within the union.</p>
+<p>For example, the union
+              schema <span class="codefrag">["null","string"]</span> would encode:</p>
+<ul>
+              
+<li>
+<span class="codefrag">null</span> as zero (the index of "null" in the union):
+                <pre class="code">00</pre>
+</li>
+              
+<li>the string <span class="codefrag">"a"</span> as one (the index of
+                "string" in the union, encoded as hex <span class="codefrag">02</span>),
+                followed by the serialized string:
+                <pre class="code">02 02 61</pre>
+</li>
+            
+</ul>
+<a name="fixed_encoding"></a>
+<h5>Fixed</h5>
+<p>Fixed instances are encoded using the number of bytes
+              declared in the schema.</p>
+<a name="json_encoding"></a>
+<h3 class="h4">JSON Encoding</h3>
+<p>Except for unions, the JSON encoding is the same as is used
+        to encode <a href="#schema_record">field default
+        values</a>.</p>
+<p>The value of a union is encoded in JSON as follows:</p>
+<ul>
+          
+<li>if its type is <span class="codefrag">null</span>, then it is encoded as
+          a JSON null;</li>
+          
+<li>otherwise it is encoded as a JSON object with one
+          name/value pair whose name is the type's name and whose
+          value is the recursively encoded value.  For Avro's named
+          types (record, fixed or enum) the user-specified name is
+          used, for other types the type name is used.</li>
+        
+</ul>
+<p>For example, the union
+          schema <span class="codefrag">["null","string","Foo"]</span>, where Foo is a
+          record name, would encode:</p>
+<ul>
+          
+<li>
+<span class="codefrag">null</span> as <span class="codefrag">null</span>;</li>
+          
+<li>the string <span class="codefrag">"a"</span> as
+            <span class="codefrag">{"string": "a"}</span>; and</li>
+          
+<li>a Foo instance as <span class="codefrag">{"Foo": {...}}</span>,
+          where <span class="codefrag">{...}</span> indicates the JSON encoding of a
+          Foo instance.</li>
+        
+</ul>
+<p>Note that a schema is still required to correctly process
+        JSON-encoded data.  For example, the JSON encoding does not
+        distinguish between <span class="codefrag">int</span>
+        and <span class="codefrag">long</span>, <span class="codefrag">float</span>
+        and <span class="codefrag">double</span>, records and maps, enums and strings,
+        etc.</p>
+<a name="single_object_encoding"></a>
+<h3 class="h4">Single-object encoding</h3>
+<p>In some situations a single Avro serialized object is to be stored for a
+        longer period of time. One very common example is storing Avro records
+        for several weeks in an <a href="http://kafka.apache.org/">Apache Kafka</a> topic.</p>
+<p>In the period after a schema change this persistance system will contain records
+        that have been written with different schemas. So the need arises to know which schema
+        was used to write a record to support schema evolution correctly.
+        In most cases the schema itself is too large to include in the message,
+        so this binary wrapper format supports the use case more effectively.</p>
+<a name="single_object_encoding_spec"></a>
+<h4>Single object encoding specification</h4>
+<p>Single Avro objects are encoded as follows:</p>
+<ol>
+            
+<li>A two-byte marker, <span class="codefrag">C3 01</span>, to show that the message is Avro and uses this single-record format (version 1).</li>
+            
+<li>The 8-byte little-endian CRC-64-AVRO <a href="#schema_fingerprints">fingerprint</a> of the object's schema</li>
+            
+<li>The Avro object encoded using <a href="#binary_encoding">Avro's binary encoding</a>
+</li>
+          
+</ol>
+<p>Implementations use the 2-byte marker to determine whether a payload is Avro.
+          This check helps avoid expensive lookups that resolve the schema from a
+          fingerprint, when the message is not an encoded Avro payload.</p>
+</div>
+
+    
+<a name="order"></a>
+<h2 class="h3">Sort Order</h2>
+<div class="section">
+<p>Avro defines a standard sort order for data.  This permits
+        data written by one system to be efficiently sorted by another
+        system.  This can be an important optimization, as sort order
+        comparisons are sometimes the most frequent per-object
+        operation.  Note also that Avro binary-encoded data can be
+        efficiently ordered without deserializing it to objects.</p>
+<p>Data items may only be compared if they have identical
+        schemas.  Pairwise comparisons are implemented recursively
+        with a depth-first, left-to-right traversal of the schema.
+        The first mismatch encountered determines the order of the
+        items.</p>
+<p>Two items with the same schema are compared according to the
+        following rules.</p>
+<ul>
+        
+<li>
+<span class="codefrag">null</span> data is always equal.</li>
+        
+<li>
+<span class="codefrag">boolean</span> data is ordered with false before true.</li>
+        
+<li>
+<span class="codefrag">int</span>, <span class="codefrag">long</span>, <span class="codefrag">float</span>
+          and <span class="codefrag">double</span> data is ordered by ascending numeric
+          value.</li>
+        
+<li>
+<span class="codefrag">bytes</span> and <span class="codefrag">fixed</span> data are
+          compared lexicographically by unsigned 8-bit values.</li>
+        
+<li>
+<span class="codefrag">string</span> data is compared lexicographically by
+          Unicode code point.  Note that since UTF-8 is used as the
+          binary encoding for strings, sorting of bytes and string
+          binary data is identical.</li>
+        
+<li>
+<span class="codefrag">array</span> data is compared lexicographically by
+          element.</li>
+        
+<li>
+<span class="codefrag">enum</span> data is ordered by the symbol's position
+          in the enum schema.  For example, an enum whose symbols are
+          <span class="codefrag">["z", "a"]</span> would sort <span class="codefrag">"z"</span> values
+          before <span class="codefrag">"a"</span> values.</li>
+        
+<li>
+<span class="codefrag">union</span> data is first ordered by the branch
+          within the union, and, within that, by the type of the
+          branch.  For example, an <span class="codefrag">["int", "string"]</span>
+          union would order all int values before all string values,
+          with the ints and strings themselves ordered as defined
+          above.</li>
+        
+<li>
+<span class="codefrag">record</span> data is ordered lexicographically by
+          field.  If a field specifies that its order is:
+          <ul>
+            
+<li>
+<span class="codefrag">"ascending"</span>, then the order of its values
+              is unaltered.</li>
+            
+<li>
+<span class="codefrag">"descending"</span>, then the order of its values
+              is reversed.</li>
+            
+<li>
+<span class="codefrag">"ignore"</span>, then its values are ignored
+              when sorting.</li>
+          
+</ul>
+        
+</li>
+        
+<li>
+<span class="codefrag">map</span> data may not be compared.  It is an error
+          to attempt to compare data containing maps unless those maps
+          are in an <span class="codefrag">"order":"ignore"</span> record field.
+        </li>
+      
+</ul>
+</div>
+
+    
+<a name="Object+Container+Files"></a>
+<h2 class="h3">Object Container Files</h2>
+<div class="section">
+<p>Avro includes a simple object container file format.  A file
+      has a schema, and all objects stored in the file must be written
+      according to that schema, using binary encoding.  Objects are
+      stored in blocks that may be compressed.  Syncronization markers
+      are used between blocks to permit efficient splitting of files
+      for MapReduce processing.</p>
+<p>Files may include arbitrary user-specified metadata.</p>
+<p>A file consists of:</p>
+<ul>
+        
+<li>A <em>file header</em>, followed by</li>
+        
+<li>one or more <em>file data blocks</em>.</li>
+      
+</ul>
+<p>A file header consists of:</p>
+<ul>
+        
+<li>Four bytes, ASCII 'O', 'b', 'j', followed by 1.</li>
+        
+<li>
+<em>file metadata</em>, including the schema.</li>
+        
+<li>The 16-byte, randomly-generated sync marker for this file.</li>
+      
+</ul>
+<p>File metadata is written as if defined by the following <a href="#map_encoding">map</a> schema:</p>
+<pre class="code">{"type": "map", "values": "bytes"}</pre>
+<p>All metadata properties that start with "avro." are reserved.
+      The following file metadata properties are currently used:</p>
+<ul>
+        
+<li>
+<strong>avro.schema</strong> contains the schema of objects
+        stored in the file, as JSON data (required).</li>
+        
+<li>
+<strong>avro.codec</strong> the name of the compression codec
+        used to compress blocks, as a string.  Implementations
+        are required to support the following codecs: "null" and "deflate".
+        If codec is absent, it is assumed to be "null".  The codecs
+        are described with more detail below.</li>
+      
+</ul>
+<p>A file header is thus described by the following schema:</p>
+<pre class="code">
+{"type": "record", "name": "org.apache.avro.file.Header",
+ "fields" : [
+   {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}},
+   {"name": "meta", "type": {"type": "map", "values": "bytes"}},
+   {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}},
+  ]
+}
+      </pre>
+<p>A file data block consists of:</p>
+<ul>
+        
+<li>A long indicating the count of objects in this block.</li>
+        
+<li>A long indicating the size in bytes of the serialized objects
+        in the current block, after any codec is applied</li>
+        
+<li>The serialized objects.  If a codec is specified, this is
+        compressed by that codec.</li>
+        
+<li>The file's 16-byte sync marker.</li>
+      
+</ul>
+<p>Thus, each block's binary data can be efficiently extracted or skipped without
+          deserializing the contents.  The combination of block size, object counts, and
+          sync markers enable detection of corrupt blocks and help ensure data integrity.</p>
+<a name="Required+Codecs"></a>
+<h3 class="h4">Required Codecs</h3>
+<a name="null"></a>
+<h4>null</h4>
+<p>The "null" codec simply passes through data uncompressed.</p>
+<a name="deflate"></a>
+<h4>deflate</h4>
+<p>The "deflate" codec writes the data block using the
+        deflate algorithm as specified in
+        <a href="http://www.isi.edu/in-notes/rfc1951.txt">RFC 1951</a>,
+        and typically implemented using the zlib library.  Note that this
+        format (unlike the "zlib format" in RFC 1950) does not have a
+        checksum.
+        </p>
+<a name="Optional+Codecs"></a>
+<h3 class="h4">Optional Codecs</h3>
+<a name="snappy"></a>
+<h4>snappy</h4>
+<p>The "snappy" codec uses
+            Google's <a href="http://code.google.com/p/snappy/">Snappy</a>
+            compression library.  Each compressed block is followed
+            by the 4-byte, big-endian CRC32 checksum of the
+            uncompressed data in the block.</p>
+</div>
+
+    
+<a name="Protocol+Declaration"></a>
+<h2 class="h3">Protocol Declaration</h2>
+<div class="section">
+<p>Avro protocols describe RPC interfaces.  Like schemas, they are
+      defined with JSON text.</p>
+<p>A protocol is a JSON object with the following attributes:</p>
+<ul>
+        
+<li>
+<em>protocol</em>, a string, the name of the protocol
+        (required);</li>
+        
+<li>
+<em>namespace</em>, an optional string that qualifies the name;</li>
+        
+<li>
+<em>doc</em>, an optional string describing this protocol;</li>
+        
+<li>
+<em>types</em>, an optional list of definitions of named types
+          (records, enums, fixed and errors).  An error definition is
+          just like a record definition except it uses "error" instead
+          of "record".  Note that forward references to named types
+          are not permitted.</li>
+        
+<li>
+<em>messages</em>, an optional JSON object whose keys are
+          message names and whose values are objects whose attributes
+          are described below.  No two messages may have the same
+          name.</li>
+      
+</ul>
+<p>The name and namespace qualification rules defined for schema objects
+	apply to protocols as well.</p>
+<a name="Messages"></a>
+<h3 class="h4">Messages</h3>
+<p>A message has attributes:</p>
+<ul>
+          
+<li>a <em>doc</em>, an optional description of the message,</li>
+          
+<li>a <em>request</em>, a list of named,
+            typed <em>parameter</em> schemas (this has the same form
+            as the fields of a record declaration);</li>
+          
+<li>a <em>response</em> schema; </li>
+          
+<li>an optional union of declared <em>error</em> schemas.
+	    The <em>effective</em> union has <span class="codefrag">"string"</span>
+	    prepended to the declared union, to permit transmission of
+	    undeclared "system" errors.  For example, if the declared
+	    error union is <span class="codefrag">["AccessError"]</span>, then the
+	    effective union is <span class="codefrag">["string", "AccessError"]</span>.
+	    When no errors are declared, the effective error union
+	    is <span class="codefrag">["string"]</span>.  Errors are serialized using
+	    the effective union; however, a protocol's JSON
+	    declaration contains only the declared union.
+	  </li>
+          
+<li>an optional <em>one-way</em> boolean parameter.</li>
+        
+</ul>
+<p>A request parameter list is processed equivalently to an
+          anonymous record.  Since record field lists may vary between
+          reader and writer, request parameters may also differ
+          between the caller and responder, and such differences are
+          resolved in the same manner as record field differences.</p>
+<p>The one-way parameter may only be true when the response type
+	  is <span class="codefrag">"null"</span> and no errors are listed.</p>
+<a name="Sample+Protocol"></a>
+<h3 class="h4">Sample Protocol</h3>
+<p>For example, one may define a simple HelloWorld protocol with:</p>
+<pre class="code">
+{
+  "namespace": "com.acme",
+  "protocol": "HelloWorld",
+  "doc": "Protocol Greetings",
+
+  "types": [
+    {"name": "Greeting", "type": "record", "fields": [
+      {"name": "message", "type": "string"}]},
+    {"name": "Curse", "type": "error", "fields": [
+      {"name": "message", "type": "string"}]}
+  ],
+
+  "messages": {
+    "hello": {
+      "doc": "Say hello.",
+      "request": [{"name": "greeting", "type": "Greeting" }],
+      "response": "Greeting",
+      "errors": ["Curse"]
+    }
+  }
+}
+        </pre>
+</div>
+
+    
+<a name="Protocol+Wire+Format"></a>
+<h2 class="h3">Protocol Wire Format</h2>
+<div class="section">
+<a name="Message+Transport"></a>
+<h3 class="h4">Message Transport</h3>
+<p>Messages may be transmitted via
+        different <em>transport</em> mechanisms.</p>
+<p>To the transport, a <em>message</em> is an opaque byte sequence.</p>
+<p>A transport is a system that supports:</p>
+<ul>
+          
+<li>
+<strong>transmission of request messages</strong>
+          
+</li>
+          
+<li>
+<strong>receipt of corresponding response messages</strong>
+            
+<p>Servers may send a response message back to the client
+            corresponding to a request message.  The mechanism of
+            correspondance is transport-specific.  For example, in
+            HTTP it is implicit, since HTTP directly supports requests
+            and responses.  But a transport that multiplexes many
+            client threads over a single socket would need to tag
+            messages with unique identifiers.</p>
+          
+</li>
+        
+</ul>
+<p>Transports may be either <em>stateless</em>
+        or <em>stateful</em>.  In a stateless transport, messaging
+        assumes no established connection state, while stateful
+        transports establish connections that may be used for multiple
+        messages.  This distinction is discussed further in
+        the <a href="#handshake">handshake</a> section below.</p>
+<a name="HTTP+as+Transport"></a>
+<h4>HTTP as Transport</h4>
+<p>When
+            <a href="http://www.w3.org/Protocols/rfc2616/rfc2616.html">HTTP</a>
+            is used as a transport, each Avro message exchange is an
+            HTTP request/response pair.  All messages of an Avro
+            protocol should share a single URL at an HTTP server.
+            Other protocols may also use that URL.  Both normal and
+            error Avro response messages should use the 200 (OK)
+            response code.  The chunked encoding may be used for
+            requests and responses, but, regardless the Avro request
+            and response are the entire content of an HTTP request and
+            response.  The HTTP Content-Type of requests and responses
+            should be specified as "avro/binary".  Requests should be
+            made using the POST method.</p>
+<p>HTTP is used by Avro as a stateless transport.</p>
+<a name="Message+Framing"></a>
+<h3 class="h4">Message Framing</h3>
+<p>Avro messages are <em>framed</em> as a list of buffers.</p>
+<p>Framing is a layer between messages and the transport.
+        It exists to optimize certain operations.</p>
+<p>The format of framed message data is:</p>
+<ul>
+          
+<li>a series of <em>buffers</em>, where each buffer consists of:
+            <ul>
+              
+<li>a four-byte, big-endian <em>buffer length</em>, followed by</li>
+              
+<li>that many bytes of <em>buffer data</em>.</li>
+            
+</ul>
+          
+</li>
+          
+<li>A message is always terminated by a zero-length buffer.</li>
+        
+</ul>
+<p>Framing is transparent to request and response message
+        formats (described below).  Any message may be presented as a
+        single or multiple buffers.</p>
+<p>Framing can permit readers to more efficiently get
+        different buffers from different sources and for writers to
+        more efficiently store different buffers to different
+        destinations.  In particular, it can reduce the number of
+        times large binary objects are copied.  For example, if an RPC
+        parameter consists of a megabyte of file data, that data can
+        be copied directly to a socket from a file descriptor, and, on
+        the other end, it could be written directly to a file
+        descriptor, never entering user space.</p>
+<p>A simple, recommended, framing policy is for writers to
+        create a new segment whenever a single binary object is
+        written that is larger than a normal output buffer.  Small
+        objects are then appended in buffers, while larger objects are
+        written as their own buffers.  When a reader then tries to
+        read a large object the runtime can hand it an entire buffer
+        directly, without having to copy it.</p>
+<a name="handshake"></a>
+<h3 class="h4">Handshake</h3>
+<p>The purpose of the handshake is to ensure that the client
+        and the server have each other's protocol definition, so that
+        the client can correctly deserialize responses, and the server
+        can correctly deserialize requests.  Both clients and servers
+        should maintain a cache of recently seen protocols, so that,
+        in most cases, a handshake will be completed without extra
+        round-trip network exchanges or the transmission of full
+        protocol text.</p>
+<p>RPC requests and responses may not be processed until a
+        handshake has been completed.  With a stateless transport, all
+        requests and responses are prefixed by handshakes.  With a
+        stateful transport, handshakes are only attached to requests
+        and responses until a successful handshake response has been
+        returned over a connection.  After this, request and response
+        payloads are sent without handshakes for the lifetime of that
+        connection.</p>
+<p>The handshake process uses the following record schemas:</p>
+<pre class="code">
+{
+  "type": "record",
+  "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
+  "fields": [
+    {"name": "clientHash",
+     "type": {"type": "fixed", "name": "MD5", "size": 16}},
+    {"name": "clientProtocol", "type": ["null", "string"]},
+    {"name": "serverHash", "type": "MD5"},
+    {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
+  ]
+}
+{
+  "type": "record",
+  "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
+  "fields": [
+    {"name": "match",
+     "type": {"type": "enum", "name": "HandshakeMatch",
+              "symbols": ["BOTH", "CLIENT", "NONE"]}},
+    {"name": "serverProtocol",
+     "type": ["null", "string"]},
+    {"name": "serverHash",
+     "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
+    {"name": "meta",
+     "type": ["null", {"type": "map", "values": "bytes"}]}
+  ]
+}
+        </pre>
+<ul>
+          
+<li>A client first prefixes each request with
+          a <span class="codefrag">HandshakeRequest</span> containing just the hash of
+          its protocol and of the server's protocol
+          (<span class="codefrag">clientHash!=null, clientProtocol=null,
+          serverHash!=null</span>), where the hashes are 128-bit MD5
+          hashes of the JSON protocol text. If a client has never
+          connected to a given server, it sends its hash as a guess of
+          the server's hash, otherwise it sends the hash that it
+          previously obtained from this server.</li>
+
+          
+<li>The server responds with
+          a <span class="codefrag">HandshakeResponse</span> containing one of:
+            <ul>
+              
+<li>
+<span class="codefrag">match=BOTH, serverProtocol=null,
+              serverHash=null</span> if the client sent the valid hash
+              of the server's protocol and the server knows what
+              protocol corresponds to the client's hash. In this case,
+              the request is complete and the response data
+              immediately follows the HandshakeResponse.</li>
+
+              
+<li>
+<span class="codefrag">match=CLIENT, serverProtocol!=null,
+              serverHash!=null</span> if the server has previously
+              seen the client's protocol, but the client sent an
+              incorrect hash of the server's protocol. The request is
+              complete and the response data immediately follows the
+              HandshakeResponse. The client must use the returned
+              protocol to process the response and should also cache
+              that protocol and its hash for future interactions with
+              this server.</li>
+
+              
+<li>
+<span class="codefrag">match=NONE</span> if the server has not
+              previously seen the client's protocol.
+              The <span class="codefrag">serverHash</span>
+              and <span class="codefrag">serverProtocol</span> may also be non-null if
+              the server's protocol hash was incorrect.
+
+              <p>In this case the client must then re-submit its request
+              with its protocol text (<span class="codefrag">clientHash!=null,
+              clientProtocol!=null, serverHash!=null</span>) and the
+              server should respond with a successful match
+              (<span class="codefrag">match=BOTH, serverProtocol=null,
+              serverHash=null</span>) as above.</p>
+              
+</li>
+            
+</ul>
+          
+</li>
+        
+</ul>
+<p>The <span class="codefrag">meta</span> field is reserved for future
+        handshake enhancements.</p>
+<a name="Call+Format"></a>
+<h3 class="h4">Call Format</h3>
+<p>A <em>call</em> consists of a request message paired with
+        its resulting response or error message.  Requests and
+        responses contain extensible metadata, and both kinds of
+        messages are framed as described above.</p>
+<p>The format of a call request is:</p>
+<ul>
+          
+<li>
+<em>request metadata</em>, a map with values of
+          type <span class="codefrag">bytes</span>
+</li>
+          
+<li>the <em>message name</em>, an Avro string,
+          followed by</li>
+          
+<li>the message <em>parameters</em>.  Parameters are
+          serialized according to the message's request
+          declaration.</li>
+        
+</ul>
+<p>When the empty string is used as a message name a server
+        should ignore the parameters and return an empty response.  A
+        client may use this to ping a server or to perform a handshake
+        without sending a protocol message.</p>
+<p>When a message is declared one-way and a stateful
+        connection has been established by a successful handshake
+        response, no response data is sent.  Otherwise the format of
+        the call response is:</p>
+<ul>
+          
+<li>
+<em>response metadata</em>, a map with values of
+          type <span class="codefrag">bytes</span>
+</li>
+          
+<li>a one-byte <em>error flag</em> boolean, followed by either:
+            <ul>
+              
+<li>if the error flag is false, the message <em>response</em>,
+                serialized per the message's response schema.</li>
+              
+<li>if the error flag is true, the <em>error</em>,
+              serialized per the message's effective error union
+              schema.</li>
+            
+</ul>
+          
+</li>
+        
+</ul>
+</div>
+
+    
+<a name="Schema+Resolution"></a>
+<h2 class="h3">Schema Resolution</h2>
+<div class="section">
+<p>A reader of Avro data, whether from an RPC or a file, can
+        always parse that data because its schema is provided.  But
+        that schema may not be exactly the schema that was expected.
+        For example, if the data was written with a different version
+        of the software than it is read, then records may have had
+        fields added or removed.  This section specifies how such
+        schema differences should be resolved.</p>
+<p>We call the schema used to write the data as
+        the <em>writer's</em> schema, and the schema that the
+        application expects the <em>reader's</em> schema.  Differences
+        between these should be resolved as follows:</p>
+<ul>
+        
+<li>
+<p>It is an error if the two schemas do not <em>match</em>.</p>
+          
+<p>To match, one of the following must hold:</p>
+          
+<ul>
+            
+<li>both schemas are arrays whose item types match</li>
+            
+<li>both schemas are maps whose value types match</li>
+            
+<li>both schemas are enums whose names match</li>
+            
+<li>both schemas are fixed whose sizes and names match</li>
+            
+<li>both schemas are records with the same name</li>
+            
+<li>either schema is a union</li>
+            
+<li>both schemas have same primitive type</li>
+            
+<li>the writer's schema may be <em>promoted</em> to the
+              reader's as follows:
+              <ul>
+                
+<li>int is promotable to long, float, or double</li>
+                
+<li>long is promotable to float or double</li>
+                
+<li>float is promotable to double</li>
+                
+<li>string is promotable to bytes</li>
+                
+<li>bytes is promotable to string</li>
+                
+</ul>
+            
+</li>
+          
+</ul>
+        
+</li>
+
+        
+<li>
+<strong>if both are records:</strong>
+          
+<ul>
+            
+<li>the ordering of fields may be different: fields are
+              matched by name.</li>
+
+            
+<li>schemas for fields with the same name in both records
+              are resolved recursively.</li>
+
+            
+<li>if the writer's record contains a field with a name
+              not present in the reader's record, the writer's value
+              for that field is ignored.</li>
+
+            
+<li>if the reader's record schema has a field that
+              contains a default value, and writer's schema does not
+              have a field with the same name, then the reader should
+              use the default value from its field.</li>
+
+            
+<li>if the reader's record schema has a field with no
+              default value, and writer's schema does not have a field
+              with the same name, an error is signalled.</li>
+          
+</ul>
+        
+</li>
+
+        
+<li>
+<strong>if both are enums:</strong>
+          
+<p>if the writer's symbol is not present in the reader's
+            enum, then an error is signalled.</p>
+        
+</li>
+
+        
+<li>
+<strong>if both are arrays:</strong>
+          
+<p>This resolution algorithm is applied recursively to the reader's and
+            writer's array item schemas.</p>
+        
+</li>
+
+        
+<li>
+<strong>if both are maps:</strong>
+          
+<p>This resolution algorithm is applied recursively to the reader's and
+            writer's value schemas.</p>
+        
+</li>
+
+        
+<li>
+<strong>if both are unions:</strong>
+          
+<p>The first schema in the reader's union that matches the
+            selected writer's union schema is recursively resolved
+            against it.  if none match, an error is signalled.</p>
+        
+</li>
+
+        
+<li>
+<strong>if reader's is a union, but writer's is not</strong>
+          
+<p>The first schema in the reader's union that matches the
+            writer's schema is recursively resolved against it.  If none
+            match, an error is signalled.</p>
+        
+</li>
+
+        
+<li>
+<strong>if writer's is a union, but reader's is not</strong>
+          
+<p>If the reader's schema matches the selected writer's schema,
+            it is recursively resolved against it.  If they do not
+            match, an error is signalled.</p>
+        
+</li>
+
+      
+</ul>
+<p>A schema's "doc" fields are ignored for the purposes of schema resolution.  Hence,
+        the "doc" portion of a schema may be dropped at serialization.</p>
+</div>
+
+    
+<a name="Parsing+Canonical+Form+for+Schemas"></a>
+<h2 class="h3">Parsing Canonical Form for Schemas</h2>
+<div class="section">
+<p>One of the defining characteristics of Avro is that a reader
+      is assumed to have the "same" schema used by the writer of the
+      data the reader is reading.  This assumption leads to a data
+      format that's compact and also amenable to many forms of schema
+      evolution.  However, the specification so far has not defined
+      what it means for the reader to have the "same" schema as the
+      writer.  Does the schema need to be textually identical?  Well,
+      clearly adding or removing some whitespace to a JSON expression
+      does not change its meaning.  At the same time, reordering the
+      fields of records clearly <em>does</em> change the meaning.  So
+      what does it mean for a reader to have "the same" schema as a
+      writer?</p>
+<p>
+<em>Parsing Canonical Form</em> is a transformation of a
+      writer's schema that let's us define what it means for two
+      schemas to be "the same" for the purpose of reading data written
+      agains the schema.  It is called <em>Parsing</em> Canonical Form
+      because the transformations strip away parts of the schema, like
+      "doc" attributes, that are irrelevant to readers trying to parse
+      incoming data.  It is called <em>Canonical Form</em> because the
+      transformations normalize the JSON text (such as the order of
+      attributes) in a way that eliminates unimportant differences
+      between schemas.  If the Parsing Canonical Forms of two
+      different schemas are textually equal, then those schemas are
+      "the same" as far as any reader is concerned, i.e., there is no
+      serialized data that would allow a reader to distinguish data
+      generated by a writer using one of the original schemas from
+      data generated by a writing using the other original schema.
+      (We sketch a proof of this property in a companion
+      document.)</p>
+<p>The next subsection specifies the transformations that define
+      Parsing Canonical Form.  But with a well-defined canonical form,
+      it can be convenient to go one step further, transforming these
+      canonical forms into simple integers ("fingerprints") that can
+      be used to uniquely identify schemas.  The subsection after next
+      recommends some standard practices for generating such
+      fingerprints.</p>
+<a name="Transforming+into+Parsing+Canonical+Form"></a>
+<h3 class="h4">Transforming into Parsing Canonical Form</h3>
+<p>Assuming an input schema (in JSON form) that's already
+        UTF-8 text for a <em>valid</em> Avro schema (including all
+        quotes as required by JSON), the following transformations
+        will produce its Parsing Canonical Form:</p>
+<ul>
+          
+<li> [PRIMITIVES] Convert primitive schemas to their simple
+          form (e.g., <span class="codefrag">int</span> instead of
+          <span class="codefrag">{"type":"int"}</span>).</li>
+
+          
+<li> [FULLNAMES] Replace short names with fullnames, using
+          applicable namespaces to do so.  Then eliminate
+          <span class="codefrag">namespace</span> attributes, which are now redundant.</li>
+
+          
+<li> [STRIP] Keep only attributes that are relevant to
+          parsing data, which are: <span class="codefrag">type</span>,
+          <span class="codefrag">name</span>, <span class="codefrag">fields</span>,
+          <span class="codefrag">symbols</span>, <span class="codefrag">items</span>,
+          <span class="codefrag">values</span>, <span class="codefrag">size</span>.  Strip all others
+          (e.g., <span class="codefrag">doc</span> and <span class="codefrag">aliases</span>).</li>
+
+          
+<li> [ORDER] Order the appearance of fields of JSON objects
+          as follows: <span class="codefrag">name</span>, <span class="codefrag">type</span>,
+          <span class="codefrag">fields</span>, <span class="codefrag">symbols</span>,
+          <span class="codefrag">items</span>, <span class="codefrag">values</span>, <span class="codefrag">size</span>.
+          For example, if an object has <span class="codefrag">type</span>,
+          <span class="codefrag">name</span>, and <span class="codefrag">size</span> fields, then the
+          <span class="codefrag">name</span> field should appear first, followed by the
+          <span class="codefrag">type</span> and then the <span class="codefrag">size</span> fields.</li>
+
+          
+<li> [STRINGS] For all JSON string literals in the schema
+          text, replace any escaped characters (e.g., \uXXXX escapes)
+          with their UTF-8 equivalents.</li>
+
+          
+<li> [INTEGERS] Eliminate quotes around and any leading
+          zeros in front of JSON integer literals (which appear in the
+          <span class="codefrag">size</span> attributes of <span class="codefrag">fixed</span> schemas).</li>
+
+          
+<li> [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.</li>
+        
+</ul>
+<a name="schema_fingerprints"></a>
+<h3 class="h4">Schema Fingerprints</h3>
+<p>"[A] fingerprinting algorithm is a procedure that maps an
+        arbitrarily large data item (such as a computer file) to a
+        much shorter bit string, its <em>fingerprint,</em> that
+        uniquely identifies the original data for all practical
+        purposes" (quoted from [<a href="http://en.wikipedia.org/wiki/Fingerprint_(computing)">Wikipedia</a>]).
+        In the Avro context, fingerprints of Parsing Canonical Form
+        can be useful in a number of applications; for example, to
+        cache encoder and decoder objects, to tag data items with a
+        short substitute for the writer's full schema, and to quickly
+        negotiate common-case schemas between readers and writers.</p>
+<p>In designing fingerprinting algorithms, there is a
+        fundamental trade-off between the length of the fingerprint
+        and the probability of collisions.  To help application
+        designers find appropriate points within this trade-off space,
+        while encouraging interoperability and ease of implementation,
+        we recommend using one of the following three algorithms when
+        fingerprinting Avro schemas:</p>
+<ul>
+          
+<li> When applications can tolerate longer fingerprints, we
+          recommend using the <a href="http://en.wikipedia.org/wiki/SHA-2">SHA-256 digest
+          algorithm</a> to generate 256-bit fingerprints of Parsing
+          Canonical Forms.  Most languages today have SHA-256
+          implementations in their libraries.</li>
+
+          
+<li> At the opposite extreme, the smallest fingerprint we
+          recommend is a 64-bit <a href="http://en.wikipedia.org/wiki/Rabin_fingerprint">Rabin
+          fingerprint</a>.  Below, we provide pseudo-code for this
+          algorithm that can be easily translated into any programming
+          language.  64-bit fingerprints should guarantee uniqueness
+          for schema caches of up to a million entries (for such a
+          cache, the chance of a collision is 3E-8).  We don't
+          recommend shorter fingerprints, as the chances of collisions
+          is too great (for example, with 32-bit fingerprints, a cache
+          with as few as 100,000 schemas has a 50% chance of having a
+          collision).</li>
+
+          
+<li>Between these two extremes, we recommend using the <a href="http://en.wikipedia.org/wiki/MD5">MD5 message
+          digest</a> to generate 128-bit fingerprints.  These make
+          sense only where very large numbers of schemas are being
+          manipulated (tens of millions); otherwise, 64-bit
+          fingerprints should be sufficient.  As with SHA-256, MD5
+          implementations are found in most libraries today.</li>
+        
+</ul>
+<p> These fingerprints are <em>not</em> meant to provide any
+        security guarantees, even the longer SHA-256-based ones.  Most
+        Avro applications should be surrounded by security measures
+        that prevent attackers from writing random data and otherwise
+        interfering with the consumers of schemas.  We recommend that
+        these surrounding mechanisms be used to prevent collision and
+        pre-image attacks (i.e., "forgery") on schema fingerprints,
+        rather than relying on the security properties of the
+        fingerprints themselves.</p>
+<p>Rabin fingerprints are <a href="http://en.wikipedia.org/wiki/Cyclic_redundancy_check">cyclic
+        redundancy checks</a> computed using irreducible polynomials.
+        In the style of the Appendix of <a href="http://www.ietf.org/rfc/rfc1952.txt">RFC&nbsp;1952</a>
+        (pg 10), which defines the CRC-32 algorithm, here's our
+        definition of the 64-bit AVRO fingerprinting algorithm:</p>
+<pre class="code">
+long fingerprint64(byte[] buf) {
+  if (FP_TABLE == null) initFPTable();
+  long fp = EMPTY;
+  for (int i = 0; i &lt; buf.length; i++)
+    fp = (fp &gt;&gt;&gt; 8) ^ FP_TABLE[(int)(fp ^ buf[i]) &amp; 0xff];
+  return fp;
+}
+
+static long EMPTY = 0xc15d213aa4d7a795L;
+static long[] FP_TABLE = null;
+
+void initFPTable() {
+  FP_TABLE = new long[256];
+  for (int i = 0; i &lt; 256; i++) {
+    long fp = i;
+    for (int j = 0; j &lt; 8; j++)
+      fp = (fp &gt;&gt;&gt; 1) ^ (EMPTY &amp; -(fp &amp; 1L));
+    FP_TABLE[i] = fp;
+  }
+}
+        </pre>
+<p> Readers interested in the mathematics behind this
+        algorithm may want to read <a href="http://www.scribd.com/fb-6001967/d/84795-Crc">this book
+        chapter.</a> (Unlike RFC-1952 and the book chapter, we prepend
+        a single one bit to messages.  We do this because CRCs ignore
+        leading zero bits, which can be problematic.  Our code
+        prepends a one-bit by initializing fingerprints using
+        <span class="codefrag">EMPTY</span>, rather than initializing using zero as in
+        RFC-1952 and the book chapter.)</p>
+</div>
+
+    
+<a name="Logical+Types"></a>
+<h2 class="h3">Logical Types</h2>
+<div class="section">
+<p>A logical type is an Avro primitive or complex type with extra attributes to
+        represent a derived type. The attribute <span class="codefrag">logicalType</span> must
+        always be present for a logical type, and is a string with the name of one of
+        the logical types listed later in this section. Other attributes may be defined
+        for particular logical types.</p>
+<p>A logical type is always serialized using its underlying Avro type so
+        that values are encoded in exactly the same way as the equivalent Avro
+        type that does not have a <span class="codefrag">logicalType</span> attribute. Language
+        implementations may choose to represent logical types with an
+        appropriate native type, although this is not required.</p>
+<p>Language implementations must ignore unknown logical types when
+        reading, and should use the underlying Avro type. If a logical type is
+        invalid, for example a decimal with scale greater than its precision,
+        then implementations should ignore the logical type and use the
+        underlying Avro type.</p>
+<a name="Decimal"></a>
+<h3 class="h4">Decimal</h3>
+<p>The <span class="codefrag">decimal</span> logical type represents an arbitrary-precision signed
+          decimal number of the form <em>unscaled &times; 10<sup>-scale</sup></em>.</p>
+<p>A <span class="codefrag">decimal</span> logical type annotates Avro
+          <span class="codefrag">bytes</span> or <span class="codefrag">fixed</span> types. The byte array must
+          contain the two's-complement representation of the unscaled integer
+          value in big-endian byte order. The scale is fixed, and is specified
+          using an attribute.</p>
+<p>The following attributes are supported:</p>
+<ul>
+          
+<li>
+<span class="codefrag">scale</span>, a JSON integer representing the scale
+            (optional). If not specified the scale is 0.</li>
+          
+<li>
+<span class="codefrag">precision</span>, a JSON integer representing the (maximum)
+            precision of decimals stored in this type (required).</li>
+        
+</ul>
+<p>For example, the following schema represents decimal numbers with a
+          maximum precision of 4 and a scale of 2:</p>
+<pre class="code">
+{
+  "type": "bytes",
+  "logicalType": "decimal",
+  "precision": 4,
+  "scale": 2
+}
+</pre>
+<p>Precision must be a positive integer greater than zero. If the
+          underlying type is a <span class="codefrag">fixed</span>, then the precision is
+          limited by its size. An array of length <span class="codefrag">n</span> can store at
+          most <em>floor(log_10(2<sup>8 &times; n - 1</sup> - 1))</em>
+          base-10 digits of precision.</p>
+<p>Scale must be zero or a positive integer less than or equal to the
+          precision.</p>
+<p>For the purposes of schema resolution, two schemas that are
+          <span class="codefrag">decimal</span> logical types <em>match</em> if their scales and
+          precisions match.</p>
+<a name="Date"></a>
+<h3 class="h4">Date</h3>
+<p>
+          The <span class="codefrag">date</span> logical type represents a date within the calendar, with no reference to a particular time zone or time of day.
+        </p>
+<p>
+          A <span class="codefrag">date</span> logical type annotates an Avro <span class="codefrag">int</span>, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar).
+        </p>
+<a name="Time+%28millisecond+precision%29"></a>
+<h3 class="h4">Time (millisecond precision)</h3>
+<p>
+          The <span class="codefrag">time-millis</span> logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond.
+        </p>
+<p>
+          A <span class="codefrag">time-millis</span> logical type annotates an Avro <span class="codefrag">int</span>, where the int stores the number of milliseconds after midnight, 00:00:00.000.
+        </p>
+<a name="Time+%28microsecond+precision%29"></a>
+<h3 class="h4">Time (microsecond precision)</h3>
+<p>
+          The <span class="codefrag">time-micros</span> logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond.
+        </p>
+<p>
+          A <span class="codefrag">time-micros</span> logical type annotates an Avro <span class="codefrag">long</span>, where the long stores the number of microseconds after midnight, 00:00:00.000000.
+        </p>
+<a name="Timestamp+%28millisecond+precision%29"></a>
+<h3 class="h4">Timestamp (millisecond precision)</h3>
+<p>
+          The <span class="codefrag">timestamp-millis</span> logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond.
+        </p>
+<p>
+          A <span class="codefrag">timestamp-millis</span> logical type annotates an Avro <span class="codefrag">long</span>, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC.
+        </p>
+<a name="Timestamp+%28microsecond+precision%29"></a>
+<h3 class="h4">Timestamp (microsecond precision)</h3>
+<p>
+          The <span class="codefrag">timestamp-micros</span> logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond.
+        </p>
+<p>
+          A <span class="codefrag">timestamp-micros</span> logical type annotates an Avro <span class="codefrag">long</span>, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC.
+        </p>
+<a name="Duration"></a>
+<h3 class="h4">Duration</h3>
+<p>
+          The <span class="codefrag">duration</span> logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods.
+        </p>
+<p>
+          A <span class="codefrag">duration</span> logical type annotates Avro <span class="codefrag">fixed</span> type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds.
+        </p>
+</div>
+
+  
+<p>
+<em>Apache Avro, Avro, Apache, and the Avro and Apache logos are
+   trademarks of The Apache Software Foundation.</em>
+</p>
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2012 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: avro/site/publish/docs/1.8.2/spec.pdf
URL: http://svn.apache.org/viewvc/avro/site/publish/docs/1.8.2/spec.pdf?rev=1797063&view=auto
==============================================================================
Binary file - no diff available.

Propchange: avro/site/publish/docs/1.8.2/spec.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream