You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/06/22 23:07:29 UTC

[5/8] orc git commit: Publish first version of site.

http://git-wip-us.apache.org/repos/asf/orc/blob/6a400548/docs/compression.html
----------------------------------------------------------------------
diff --git a/docs/compression.html b/docs/compression.html
new file mode 100644
index 0000000..f714469
--- /dev/null
+++ b/docs/compression.html
@@ -0,0 +1,1005 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<head>
+  <meta charset="UTF-8">
+  <title>Compression</title>
+  <meta name="viewport" content="width=device-width,initial-scale=1">
+  <meta name="generator" content="Jekyll v2.4.0">
+  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+  <link rel="stylesheet" href="/css/screen.css">
+  <link rel="icon" type="image/x-icon" href="/favicon.ico">
+  <!--[if lt IE 9]>
+  <script src="/js/html5shiv.min.js"></script>
+  <script src="/js/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+
+<body class="wrap">
+  <header role="banner">
+  <nav class="mobile-nav show-on-mobiles">
+    <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+  </nav>
+  <div class="grid">
+    <div class="unit one-third center-on-mobiles">
+      <h1>
+        <a href="/">
+          <span class="sr-only">Apache ORC</span>
+          <img src="/img/logo.png" width="249" height="115" alt="ORC Logo">
+        </a>
+      </h1>
+    </div>
+    <nav class="main-nav unit two-thirds hide-on-mobiles">
+      <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+    </nav>
+  </div>
+</header>
+
+
+    <section class="docs">
+    <div class="grid">
+
+      <div class="docs-nav-mobile unit whole show-on-mobiles">
+  <select onchange="if (this.value) window.location.href=this.value">
+    <option value="">Navigate the docs…</option>
+    
+    <optgroup label="Overview">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/index.html">Background</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/types.html">Types</option>
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/indexes.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+      <option value="/docs/acid.html">ACID support</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Hive Usage">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-config.html">Hive Configuration</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Compression</h1>
+          <p>If the ORC file writer selects a generic compression codec (zlib or
+snappy), every part of the ORC file except for the Postscript is
+compressed with that codec. However, one of the requirements for ORC
+is that the reader be able to skip over compressed bytes without
+decompressing the entire stream. To manage this, ORC writes compressed
+streams in chunks with headers as in the figure below.
+To handle uncompressable data, if the compressed data is larger than
+the original, the original is stored and the isOriginal flag is
+set. Each header is 3 bytes long with (compressedLength * 2 +
+isOriginal) stored as a little endian value. For example, the header
+for a chunk that compressed to 100,000 bytes would be [0x40, 0x0d,
+0x03]. The header for 5 bytes that did not compress would be [0x0b,
+0x00, 0x00]. Each compression chunk is compressed independently so
+that as long as a decompressor starts at the top of a header, it can
+start decompressing without the previous bytes.</p>
+
+<p><img src="/img/CompressionStream.png" alt="compression streams" /></p>
+
+<p>The default compression chunk size is 256K, but writers can choose
+their own value less than 223. Larger chunks lead to better
+compression, but require more memory. The chunk size is recorded in
+the Postscript so that readers can allocate appropriately sized
+buffers.</p>
+
+<p>ORC files without generic compression write each stream directly
+with no headers.</p>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/file-tail.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/run-length.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Hive Usage</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class="current"><a href="/docs/compression.html">Compression</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      
+
+
+</ul>
+
+    
+  </aside>
+</div>
+
+
+      <div class="clear"></div>
+
+    </div>
+  </section>
+
+
+  <footer role="contentinfo">
+  <p>The contents of this website are &copy;&nbsp;2015
+     <a href="https://www.apache.org/">Apache Software Foundation</a>
+     under the terms of the <a
+      href="https://www.apache.org/licenses/LICENSE-2.0.html">
+      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
+      of the Apache Software Foundation.</p>
+</footer>
+
+  <script>
+  var anchorForId = function (id) {
+    var anchor = document.createElement("a");
+    anchor.className = "header-link";
+    anchor.href      = "#" + id;
+    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
+    anchor.title = "Permalink";
+    return anchor;
+  };
+
+  var linkifyAnchors = function (level, containingElement) {
+    var headers = containingElement.getElementsByTagName("h" + level);
+    for (var h = 0; h < headers.length; h++) {
+      var header = headers[h];
+
+      if (typeof header.id !== "undefined" && header.id !== "") {
+        header.appendChild(anchorForId(header.id));
+      }
+    }
+  };
+
+  document.onreadystatechange = function () {
+    if (this.readyState === "complete") {
+      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
+      if (!contentBlock) {
+        return;
+      }
+      for (var level = 1; level <= 6; level++) {
+        linkifyAnchors(level, contentBlock);
+      }
+    }
+  };
+</script>
+
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/orc/blob/6a400548/docs/encodings.html
----------------------------------------------------------------------
diff --git a/docs/encodings.html b/docs/encodings.html
new file mode 100644
index 0000000..d7a0c73
--- /dev/null
+++ b/docs/encodings.html
@@ -0,0 +1,1600 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<head>
+  <meta charset="UTF-8">
+  <title>Column Encodings</title>
+  <meta name="viewport" content="width=device-width,initial-scale=1">
+  <meta name="generator" content="Jekyll v2.4.0">
+  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+  <link rel="stylesheet" href="/css/screen.css">
+  <link rel="icon" type="image/x-icon" href="/favicon.ico">
+  <!--[if lt IE 9]>
+  <script src="/js/html5shiv.min.js"></script>
+  <script src="/js/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+
+<body class="wrap">
+  <header role="banner">
+  <nav class="mobile-nav show-on-mobiles">
+    <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+  </nav>
+  <div class="grid">
+    <div class="unit one-third center-on-mobiles">
+      <h1>
+        <a href="/">
+          <span class="sr-only">Apache ORC</span>
+          <img src="/img/logo.png" width="249" height="115" alt="ORC Logo">
+        </a>
+      </h1>
+    </div>
+    <nav class="main-nav unit two-thirds hide-on-mobiles">
+      <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+    </nav>
+  </div>
+</header>
+
+
+    <section class="docs">
+    <div class="grid">
+
+      <div class="docs-nav-mobile unit whole show-on-mobiles">
+  <select onchange="if (this.value) window.location.href=this.value">
+    <option value="">Navigate the docs…</option>
+    
+    <optgroup label="Overview">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/index.html">Background</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/types.html">Types</option>
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/indexes.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+      <option value="/docs/acid.html">ACID support</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Hive Usage">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-config.html">Hive Configuration</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Column Encodings</h1>
+          <h2 id="smallint-int-and-bigint-columns">SmallInt, Int, and BigInt Columns</h2>
+
+<p>All of the 16, 32, and 64 bit integer column types use the same set of
+potential encodings, which is basically whether they use RLE v1 or
+v2. If the PRESENT stream is not included, all of the values are
+present. For values that have false bits in the present stream, no
+values are included in the data stream.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Signed Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Signed Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="float-and-double-columns">Float and Double Columns</h2>
+
+<p>Floating point types are stored using IEEE 754 floating point bit
+layout. Float columns use 4 bytes per value and double columns use 8
+bytes.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">IEEE 754 floating point representation</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="string-char-and-varchar-columns">String, Char, and VarChar Columns</h2>
+
+<p>String columns are adaptively encoded based on whether the first
+10,000 values are sufficiently distinct. In all of the encodings, the
+PRESENT stream encodes whether the value is null.</p>
+
+<p>For direct encoding the UTF-8 bytes are saved in the DATA stream and
+the length of each value is written into the LENGTH stream. In direct
+encoding, if the values were [“Nevada”, “California”]; the DATA
+would be “NevadaCalifornia” and the LENGTH would be [6, 10].</p>
+
+<p>For dictionary encodings the dictionary is sorted and UTF-8 bytes of
+each unique value are placed into DICTIONARY_DATA. The length of each
+item in the dictionary is put into the LENGTH stream. The DATA stream
+consists of the sequence of references to the dictionary elements.</p>
+
+<p>In dictionary encoding, if the values were [“Nevada”,
+“California”, “Nevada”, “California”, and “Florida”]; the
+DICTIONARY_DATA would be “CaliforniaFloridaNevada” and LENGTH would
+be [10, 7, 6]. The DATA would be [2, 0, 2, 0, 1].</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">String contents</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DICTIONARY</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DICTIONARY_DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">String contents</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">String contents</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DICTIONARY_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DICTIONARY_DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">String contents</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="boolean-columns">Boolean Columns</h2>
+
+<p>Boolean columns are rare, but have a simple encoding.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="tinyint-columns">TinyInt Columns</h2>
+
+<p>TinyInt (byte) columns use byte run length encoding.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Byte RLE</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="binary-columns">Binary Columns</h2>
+
+<p>Binary data is encoded with a PRESENT stream, a DATA stream that records
+the contents, and a LENGTH stream that records the number of bytes per a
+value.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">String contents</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">String contents</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="decimal-columns">Decimal Columns</h2>
+
+<p>Decimal was introduced in Hive 0.11 with infinite precision (the total
+number of digits). In Hive 0.13, the definition was change to limit
+the precision to a maximum of 38 digits, which conveniently uses 127
+bits plus a sign bit. The current encoding of decimal columns stores
+the integer representation of the value as an unbounded length zigzag
+encoded base 128 varint. The scale is stored in the SECONDARY stream
+as an unsigned integer.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unbounded base 128 varints</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">SECONDARY</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unbounded base 128 varints</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">SECONDARY</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="date-columns">Date Columns</h2>
+
+<p>Date data is encoded with a PRESENT stream, a DATA stream that records
+the number of days after January 1, 1970 in UTC.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Signed Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Signed Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="timestamp-columns">Timestamp Columns</h2>
+
+<p>Timestamp records times down to nanoseconds as a PRESENT stream that
+records non-null values, a DATA stream that records the number of
+seconds after 1 January 2015, and a SECONDARY stream that records the
+number of nanoseconds.</p>
+
+<p>Because the number of nanoseconds often has a large number of trailing
+zeros, the number has trailing decimal zero digits removed and the
+last three bits are used to record how many zeros were removed. Thus
+1000 nanoseconds would be serialized as 0x0b and 100000 would be
+serialized as 0x0d.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Signed Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">SECONDARY</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DATA</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Signed Integer RLE v2</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">SECONDARY</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="struct-columns">Struct Columns</h2>
+
+<p>Structs have no data themselves and delegate everything to their child
+columns except for their PRESENT stream. They have a child column
+for each of the fields.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="list-columns">List Columns</h2>
+
+<p>Lists are encoded as the PRESENT stream and a length stream with
+number of items in each list. They have a single child column for the
+element values.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="map-columns">Map Columns</h2>
+
+<p>Maps are encoded as the PRESENT stream and a length stream with number
+of items in each list. They have a child column for the key and
+another child column for the value.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v1</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">DIRECT_V2</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">LENGTH</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Unsigned Integer RLE v2</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="union-columns">Union Columns</h2>
+
+<p>Unions are encoded as the PRESENT stream and a tag stream that controls which
+potential variant is used. They have a child column for each variant of the
+union. Currently ORC union types are limited to 256 variants, which matches
+the Hive type model.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Encoding</th>
+      <th style="text-align: left">Stream Kind</th>
+      <th style="text-align: left">Optional</th>
+      <th style="text-align: left">Contents</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">PRESENT</td>
+      <td style="text-align: left">Yes</td>
+      <td style="text-align: left">Boolean RLE</td>
+    </tr>
+    <tr>
+      <td style="text-align: left"> </td>
+      <td style="text-align: left">DIRECT</td>
+      <td style="text-align: left">No</td>
+      <td style="text-align: left">Byte RLE</td>
+    </tr>
+  </tbody>
+</table>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/stripes.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/spec-index.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Hive Usage</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/compression.html">Compression</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/encodings.html">Column Encodings</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      
+
+
+</ul>
+
+    
+  </aside>
+</div>
+
+
+      <div class="clear"></div>
+
+    </div>
+  </section>
+
+
+  <footer role="contentinfo">
+  <p>The contents of this website are &copy;&nbsp;2015
+     <a href="https://www.apache.org/">Apache Software Foundation</a>
+     under the terms of the <a
+      href="https://www.apache.org/licenses/LICENSE-2.0.html">
+      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
+      of the Apache Software Foundation.</p>
+</footer>
+
+  <script>
+  var anchorForId = function (id) {
+    var anchor = document.createElement("a");
+    anchor.className = "header-link";
+    anchor.href      = "#" + id;
+    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
+    anchor.title = "Permalink";
+    return anchor;
+  };
+
+  var linkifyAnchors = function (level, containingElement) {
+    var headers = containingElement.getElementsByTagName("h" + level);
+    for (var h = 0; h < headers.length; h++) {
+      var header = headers[h];
+
+      if (typeof header.id !== "undefined" && header.id !== "") {
+        header.appendChild(anchorForId(header.id));
+      }
+    }
+  };
+
+  document.onreadystatechange = function () {
+    if (this.readyState === "complete") {
+      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
+      if (!contentBlock) {
+        return;
+      }
+      for (var level = 1; level <= 6; level++) {
+        linkifyAnchors(level, contentBlock);
+      }
+    }
+  };
+</script>
+
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/orc/blob/6a400548/docs/file-tail.html
----------------------------------------------------------------------
diff --git a/docs/file-tail.html b/docs/file-tail.html
new file mode 100644
index 0000000..dc29a0c
--- /dev/null
+++ b/docs/file-tail.html
@@ -0,0 +1,1288 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<head>
+  <meta charset="UTF-8">
+  <title>File Tail</title>
+  <meta name="viewport" content="width=device-width,initial-scale=1">
+  <meta name="generator" content="Jekyll v2.4.0">
+  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+  <link rel="stylesheet" href="/css/screen.css">
+  <link rel="icon" type="image/x-icon" href="/favicon.ico">
+  <!--[if lt IE 9]>
+  <script src="/js/html5shiv.min.js"></script>
+  <script src="/js/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+
+<body class="wrap">
+  <header role="banner">
+  <nav class="mobile-nav show-on-mobiles">
+    <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+  </nav>
+  <div class="grid">
+    <div class="unit one-third center-on-mobiles">
+      <h1>
+        <a href="/">
+          <span class="sr-only">Apache ORC</span>
+          <img src="/img/logo.png" width="249" height="115" alt="ORC Logo">
+        </a>
+      </h1>
+    </div>
+    <nav class="main-nav unit two-thirds hide-on-mobiles">
+      <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+    </nav>
+  </div>
+</header>
+
+
+    <section class="docs">
+    <div class="grid">
+
+      <div class="docs-nav-mobile unit whole show-on-mobiles">
+  <select onchange="if (this.value) window.location.href=this.value">
+    <option value="">Navigate the docs…</option>
+    
+    <optgroup label="Overview">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/index.html">Background</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/types.html">Types</option>
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/indexes.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+      <option value="/docs/acid.html">ACID support</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Hive Usage">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-config.html">Hive Configuration</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>File Tail</h1>
+          <p>Since HDFS does not support changing the data in a file after it is
+written, ORC stores the top level index at the end of the file. The
+overall structure of the file is given in the figure above.  The
+file’s tail consists of 3 parts; the file metadata, file footer and
+postscript.</p>
+
+<p>The metadata for ORC is stored using
+<a href="http://s.apache.org/protobuf_encoding">Protocol Buffers</a>, which provides
+the ability to add new fields without breaking readers. This document
+incorporates the Protobuf definition from the
+<a href="http://s.apache.org/orc_proto">ORC source code</a> and the
+reader is encouraged to review the Protobuf encoding if they need to
+understand the byte-level encoding</p>
+
+<h1 id="postscript">Postscript</h1>
+
+<p>The Postscript section provides the necessary information to interpret
+the rest of the file including the length of the file’s Footer and
+Metadata sections, the version of the file, and the kind of general
+compression used (eg. none, zlib, or snappy). The Postscript is never
+compressed and ends one byte before the end of the file. The version
+stored in the Postscript is the lowest version of Hive that is
+guaranteed to be able to read the file and it stored as a sequence of
+the major and minor version. There are currently two versions that are
+used: [0,11] for Hive 0.11, and [0,12] for Hive 0.12 or later.</p>
+
+<p>The process of reading an ORC file works backwards through the
+file. Rather than making multiple short reads, the ORC reader reads
+the last 16k bytes of the file with the hope that it will contain both
+the Footer and Postscript sections. The final byte of the file
+contains the serialized length of the Postscript, which must be less
+than 256 bytes. Once the Postscript is parsed, the compressed
+serialized length of the Footer is known and it can be decompressed
+and parsed.</p>
+
+<p><code>message PostScript {
+ // the length of the footer section in bytes
+ optional uint64 footerLength = 1;
+ // the kind of generic compression used
+ optional CompressionKind compression = 2;
+ // the maximum size of each compression chunk
+ optional uint64 compressionBlockSize = 3;
+ // the version of the writer
+ repeated uint32 version = 4 [packed = true];
+ // the length of the metadata section in bytes
+ optional uint64 metadataLength = 5;
+ // the fixed string "ORC"
+ optional string magic = 8000;
+}
+</code></p>
+
+<p><code>enum CompressionKind {
+ NONE = 0;
+ ZLIB = 1;
+ SNAPPY = 2;
+ LZO = 3;
+}
+</code></p>
+
+<h1 id="footer">Footer</h1>
+
+<p>The Footer section contains the layout of the body of the file, the
+type schema information, the number of rows, and the statistics about
+each of the columns.</p>
+
+<p>The file is broken in to three parts- Header, Body, and Tail. The
+Header consists of the bytes “ORC’’ to support tools that want to
+scan the front of the file to determine the type of the file. The Body
+contains the rows and indexes, and the Tail gives the file level
+information as described in this section.</p>
+
+<p><code>message Footer {
+ // the length of the file header in bytes (always 3)
+ optional uint64 headerLength = 1;
+ // the length of the file header and body in bytes
+ optional uint64 contentLength = 2;
+ // the information about the stripes
+ repeated StripeInformation stripes = 3;
+ // the schema information
+ repeated Type types = 4;
+ // the user metadata that was added
+ repeated UserMetadataItem metadata = 5;
+ // the total number of rows in the file
+ optional uint64 numberOfRows = 6;
+ // the statistics of each column across the file
+ repeated ColumnStatistics statistics = 7;
+ // the maximum number of rows in each index entry
+ optional uint32 rowIndexStride = 8;
+}
+</code></p>
+
+<h2 id="stripe-information">Stripe Information</h2>
+
+<p>The body of the file is divided into stripes. Each stripe is self
+contained and may be read using only its own bytes combined with the
+file’s Footer and Postscript. Each stripe contains only entire rows so
+that rows never straddle stripe boundaries. Stripes have three
+sections: a set of indexes for the rows within the stripe, the data
+itself, and a stripe footer. Both the indexes and the data sections
+are divided by columns so that only the data for the required columns
+needs to be read.</p>
+
+<p><code>message StripeInformation {
+ // the start of the stripe within the file
+ optional uint64 offset = 1;
+ // the length of the indexes in bytes
+ optional uint64 indexLength = 2;
+ // the length of the data in bytes
+ optional uint64 dataLength = 3;
+ // the length of the footer in bytes
+ optional uint64 footerLength = 4;
+ // the number of rows in the stripe
+ optional uint64 numberOfRows = 5;
+}
+</code></p>
+
+<h2 id="type-information">Type Information</h2>
+
+<p>All of the rows in an ORC file must have the same schema. Logically
+the schema is expressed as a tree as in the figure below, where
+the compound types have subcolumns under them.</p>
+
+<p><img src="/img/TreeWriters.png" alt="ORC column structure" /></p>
+
+<p>The equivalent Hive DDL would be:</p>
+
+<p><code>create table Foobar (
+ myInt int,
+ myMap map&lt;string,
+ struct&lt;myString : string,
+ myDouble: double&gt;&gt;,
+ myTime timestamp
+);
+</code></p>
+
+<p>The type tree is flattened in to a list via a pre-order traversal
+where each type is assigned the next id. Clearly the root of the type
+tree is always type id 0. Compound types have a field named subtypes
+that contains the list of their children’s type ids.</p>
+
+<p><code>message Type {
+ enum Kind {
+ BOOLEAN = 0;
+ BYTE = 1;
+ SHORT = 2;
+ INT = 3;
+ LONG = 4;
+ FLOAT = 5;
+ DOUBLE = 6;
+ STRING = 7;
+ BINARY = 8;
+ TIMESTAMP = 9;
+ LIST = 10;
+ MAP = 11;
+ STRUCT = 12;
+ UNION = 13;
+ DECIMAL = 14;
+ DATE = 15;
+ VARCHAR = 16;
+ CHAR = 17;
+ }
+ // the kind of this type
+ required Kind kind = 1;
+ // the type ids of any subcolumns for list, map, struct, or union
+ repeated uint32 subtypes = 2 [packed=true];
+ // the list of field names for struct
+ repeated string fieldNames = 3;
+ // the maximum length of the type for varchar or char
+ optional uint32 maximumLength = 4;
+ // the precision and scale for decimal
+ optional uint32 precision = 5;
+ optional uint32 scale = 6;
+}
+</code></p>
+
+<h2 id="column-statistics">Column Statistics</h2>
+
+<p>The goal of the column statistics is that for each column, the writer
+records the count and depending on the type other useful fields. For
+most of the primitive types, it records the minimum and maximum
+values; and for numeric types it additionally stores the sum.
+From Hive 1.1.0 onwards, the column statistics will also record if
+there are any null values within the row group by setting the hasNull flag.
+The hasNull flag is used by ORC’s predicate pushdown to better answer
+‘IS NULL’ queries.</p>
+
+<p><code>message ColumnStatistics {
+ // the number of values
+ optional uint64 numberOfValues = 1;
+ // At most one of these has a value for any column
+ optional IntegerStatistics intStatistics = 2;
+ optional DoubleStatistics doubleStatistics = 3;
+ optional StringStatistics stringStatistics = 4;
+ optional BucketStatistics bucketStatistics = 5;
+ optional DecimalStatistics decimalStatistics = 6;
+ optional DateStatistics dateStatistics = 7;
+ optional BinaryStatistics binaryStatistics = 8;
+ optional TimestampStatistics timestampStatistics = 9;
+ optional bool hasNull = 10;
+}
+</code></p>
+
+<p>For integer types (tinyint, smallint, int, bigint), the column
+statistics includes the minimum, maximum, and sum. If the sum
+overflows long at any point during the calculation, no sum is
+recorded.</p>
+
+<p><code>message IntegerStatistics {
+ optional sint64 minimum = 1;
+ optional sint64 maximum = 2;
+ optional sint64 sum = 3;
+}
+</code></p>
+
+<p>For floating point types (float, double), the column statistics
+include the minimum, maximum, and sum. If the sum overflows a double,
+no sum is recorded.</p>
+
+<p><code>message DoubleStatistics {
+ optional double minimum = 1;
+ optional double maximum = 2;
+ optional double sum = 3;
+}
+</code></p>
+
+<p>For strings, the minimum value, maximum value, and the sum of the
+lengths of the values are recorded.</p>
+
+<p><code>message StringStatistics {
+ optional string minimum = 1;
+ optional string maximum = 2;
+ // sum will store the total length of all strings
+ optional sint64 sum = 3;
+}
+</code></p>
+
+<p>For booleans, the statistics include the count of false and true values.</p>
+
+<p><code>message BucketStatistics {
+ repeated uint64 count = 1 [packed=true];
+}
+</code></p>
+
+<p>For decimals, the minimum, maximum, and sum are stored.</p>
+
+<p><code>message DecimalStatistics {
+ optional string minimum = 1;
+ optional string maximum = 2;
+ optional string sum = 3;
+}
+</code></p>
+
+<p>Date columns record the minimum and maximum values as the number of
+days since the epoch (1/1/2015).</p>
+
+<p><code>message DateStatistics {
+ // min,max values saved as days since epoch
+ optional sint32 minimum = 1;
+ optional sint32 maximum = 2;
+}
+</code></p>
+
+<p>Timestamp columns record the minimum and maximum values as the number of
+milliseconds since the epoch (1/1/2015).</p>
+
+<p><code>message TimestampStatistics {
+ // min,max values saved as milliseconds since epoch
+ optional sint64 minimum = 1;
+ optional sint64 maximum = 2;
+}
+</code></p>
+
+<p>Binary columns store the aggregate number of bytes across all of the values.</p>
+
+<p><code>message BinaryStatistics {
+ // sum will store the total binary blob length
+ optional sint64 sum = 1;
+}
+</code></p>
+
+<h2 id="user-metadata">User Metadata</h2>
+
+<p>The user can add arbitrary key/value pairs to an ORC file as it is
+written. The contents of the keys and values are completely
+application defined, but the key is a string and the value is
+binary. Care should be taken by applications to make sure that their
+keys are unique and in general should be prefixed with an organization
+code.</p>
+
+<p><code>message UserMetadataItem {
+ // the user defined key
+ required string name = 1;
+ // the user defined binary value
+ required bytes value = 2;
+}
+</code></p>
+
+<h2 id="file-metadata">File Metadata</h2>
+
+<p>The file Metadata section contains column statistics at the stripe
+level granularity. These statistics enable input split elimination
+based on the predicate push-down evaluated per a stripe.</p>
+
+<p><code>message StripeStatistics {
+ repeated ColumnStatistics colStats = 1;
+}
+</code></p>
+
+<p><code>message Metadata {
+ repeated StripeStatistics stripeStats = 1;
+}
+</code></p>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/spec-intro.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/compression.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Hive Usage</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/compression.html">Compression</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      
+
+
+</ul>
+
+    
+  </aside>
+</div>
+
+
+      <div class="clear"></div>
+
+    </div>
+  </section>
+
+
+  <footer role="contentinfo">
+  <p>The contents of this website are &copy;&nbsp;2015
+     <a href="https://www.apache.org/">Apache Software Foundation</a>
+     under the terms of the <a
+      href="https://www.apache.org/licenses/LICENSE-2.0.html">
+      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
+      of the Apache Software Foundation.</p>
+</footer>
+
+  <script>
+  var anchorForId = function (id) {
+    var anchor = document.createElement("a");
+    anchor.className = "header-link";
+    anchor.href      = "#" + id;
+    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
+    anchor.title = "Permalink";
+    return anchor;
+  };
+
+  var linkifyAnchors = function (level, containingElement) {
+    var headers = containingElement.getElementsByTagName("h" + level);
+    for (var h = 0; h < headers.length; h++) {
+      var header = headers[h];
+
+      if (typeof header.id !== "undefined" && header.id !== "") {
+        header.appendChild(anchorForId(header.id));
+      }
+    }
+  };
+
+  document.onreadystatechange = function () {
+    if (this.readyState === "complete") {
+      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
+      if (!contentBlock) {
+        return;
+      }
+      for (var level = 1; level <= 6; level++) {
+        linkifyAnchors(level, contentBlock);
+      }
+    }
+  };
+</script>
+
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/orc/blob/6a400548/docs/hive-config.html
----------------------------------------------------------------------
diff --git a/docs/hive-config.html b/docs/hive-config.html
new file mode 100644
index 0000000..3bd72e4
--- /dev/null
+++ b/docs/hive-config.html
@@ -0,0 +1,1184 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<head>
+  <meta charset="UTF-8">
+  <title>Hive Configuration</title>
+  <meta name="viewport" content="width=device-width,initial-scale=1">
+  <meta name="generator" content="Jekyll v2.4.0">
+  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+  <link rel="stylesheet" href="/css/screen.css">
+  <link rel="icon" type="image/x-icon" href="/favicon.ico">
+  <!--[if lt IE 9]>
+  <script src="/js/html5shiv.min.js"></script>
+  <script src="/js/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+
+<body class="wrap">
+  <header role="banner">
+  <nav class="mobile-nav show-on-mobiles">
+    <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+  </nav>
+  <div class="grid">
+    <div class="unit one-third center-on-mobiles">
+      <h1>
+        <a href="/">
+          <span class="sr-only">Apache ORC</span>
+          <img src="/img/logo.png" width="249" height="115" alt="ORC Logo">
+        </a>
+      </h1>
+    </div>
+    <nav class="main-nav unit two-thirds hide-on-mobiles">
+      <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Documentation</a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+    </nav>
+  </div>
+</header>
+
+
+    <section class="docs">
+    <div class="grid">
+
+      <div class="docs-nav-mobile unit whole show-on-mobiles">
+  <select onchange="if (this.value) window.location.href=this.value">
+    <option value="">Navigate the docs…</option>
+    
+    <optgroup label="Overview">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/index.html">Background</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/types.html">Types</option>
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/indexes.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+      <option value="/docs/acid.html">ACID support</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Hive Usage">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-config.html">Hive Configuration</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Hive Configuration</h1>
+          <h2 id="table-properties">Table properties</h2>
+
+<p>Tables stored as ORC files use table properties to control their behavior. By
+using table properties, the table owner ensures that all clients store data
+with the same options.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th style="text-align: left">Key</th>
+      <th style="text-align: left">Default</th>
+      <th style="text-align: left">Notes</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td style="text-align: left">orc.compress</td>
+      <td style="text-align: left">ZLIB</td>
+      <td style="text-align: left">high level compression = {NONE, ZLIB, SNAPPY}</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">orc.compress.size</td>
+      <td style="text-align: left">262,144</td>
+      <td style="text-align: left">compression chunk size</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">orc.stripe.size</td>
+      <td style="text-align: left">268,435,456</td>
+      <td style="text-align: left">memory buffer size in bytes for writing</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">orc.row.index.stride</td>
+      <td style="text-align: left">10,000</td>
+      <td style="text-align: left">number of rows between index entries</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">orc.create.index</td>
+      <td style="text-align: left">true</td>
+      <td style="text-align: left">create indexes?</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">orc.bloom.filter.columns</td>
+      <td style="text-align: left">””</td>
+      <td style="text-align: left">comma separated list of column names</td>
+    </tr>
+    <tr>
+      <td style="text-align: left">orc.bloom.filter.fpp</td>
+      <td style="text-align: left">0.05</td>
+      <td style="text-align: left">bloom filter false positive rate</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>For example, to create an ORC table without high level compression:</p>
+
+<p><code>CREATE TABLE istari (
+  name STRING,
+  color STRING
+) STORED AS ORC TBLPROPERTIES ("orc.compress"="NONE");
+</code></p>
+
+<h2 id="configuration-properties">Configuration properties</h2>
+
+<p>There are many Hive configuration properties related to ORC files:</p>
+
+<table class="configtable">
+<tr>
+  <th>Key</th>
+  <th>Default</th>
+  <th>Notes</th>
+</tr>
+<tr>
+  <td>hive.default.fileformat</td>
+  <td>TextFile</td>
+  <td>This is the default file format for new tables. If it is set to ORC,
+      new tables will default to ORC.</td>
+</tr>
+<tr>
+  <td>hive.stats.gather.num.threads</td>
+  <td>10</td>
+  <td>Number of threads used by partialscan/noscan analyze command for
+      partitioned tables. This is applicable only for file formats that
+      implement the StatsProvidingRecordReader interface (like ORC).</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.memory.pool</td>
+  <td>0.5</td>
+  <td>Maximum fraction of heap that can be used by ORC file writers.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.write.format</td>
+  <td>NULL</td>
+  <td>Define the version of the file to write. Possible values are 0.11 and
+      0.12. If this parameter is not defined, ORC will use the latest
+      version.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.default.stripe.size</td>
+  <td>67,108,864</td>
+  <td>Define the default size of ORC writer buffers in bytes.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.default.block.size</td>
+  <td>268,435,456</td>
+  <td>Define the default file system block size for ORC files.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.dictionary.key.size.threshold</td>
+  <td>0.8</td>
+  <td>If the number of keys in a dictionary is greater than this
+      fraction of the total number of non-null rows, turn off
+      dictionary encoding. Use 1.0 to always use dictionary encoding.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.default.row.index.stride</td>
+  <td>10,000</td>
+  <td>Define the default number of rows between row index entries.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.default.buffer.size</td>
+  <td>262,144</td>
+  <td>Define the default ORC buffer size, in bytes.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.default.block.padding</td>
+  <td>true</td>
+  <td>Should ORC file writers pad stripes to minimize stripes that cross HDFS
+      block boundaries.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.block.padding.tolerance</td>
+  <td>0.05</td>
+  <td>Define the tolerance for block padding as a decimal fraction of
+      stripe size (for example, the default value 0.05 is 5% of the
+      stripe size). For the defaults of 64Mb ORC stripe and 256Mb HDFS
+      blocks, a maximum of 3.2Mb will be reserved for padding within
+      the 256Mb block with the default
+      hive.exec.orc.block.padding.tolerance. In that case, if the
+      available size within the block is more than 3.2Mb, a new
+      smaller stripe will be inserted to fit within that space. This
+      will make sure that no stripe written will cross block
+      boundaries and cause remote reads within a node local task.
+
+<tr>
+  <td>hive.exec.orc.default.compress</td>
+  <td>ZLIB</td>
+  <td>Define the default compression codec for ORC file.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.encoding.strategy</td>
+  <td>SPEED</td>
+  <td>Define the encoding strategy to use while writing data. Changing
+      this will only affect the light weight encoding for
+      integers. This flag will not change the compression level of
+      higher level compression codec (like ZLIB). Possible options are
+      SPEED and COMPRESSION.</td>
+</tr>
+<tr>
+  <td>hive.orc.splits.include.file.footer</td>
+  <td>false</td>
+  <td>If turned on, splits generated by ORC will include metadata
+      about the stripes in the file. This data is read remotely (from
+      the client or HiveServer2 machine) and sent to all the tasks.</td>
+</tr>
+<tr>
+  <td>hive.orc.cache.stripe.details.size</td>
+  <td>10,000</td>
+  <td>Cache size for keeping meta information about ORC splits cached in the
+      client.</td>
+</tr>
+<tr>
+  <td>hive.orc.compute.splits.num.threads</td>
+  <td>10</td>
+  <td>How many threads ORC should use to create splits in parallel.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.skip.corrupt.data</td>
+  <td>false</td>
+  <td>If ORC reader encounters corrupt data, this value will be used
+      to determine whether to skip the corrupt data or throw an
+      exception. The default behavior is to throw an exception.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.zerocopy</td>
+  <td>false</td>
+  <td>Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)</td>
+</tr>
+<tr>
+  <td>hive.merge.orcfile.stripe.level</td>
+  <td>true</td>
+  <td>When hive.merge.mapfiles, hive.merge.mapredfiles or
+      hive.merge.tezfiles is enabled while writing a table with ORC
+      file format, enabling this configuration property will do
+      stripe-level fast merge for small ORC files. Note that enabling
+      this configuration property will not honor the padding tolerance
+      configuration (hive.exec.orc.block.padding.tolerance).</td>
+</tr>
+<tr>
+  <td>hive.orc.row.index.stride.dictionary.check</td>
+  <td>true</td>
+  <td>If enabled dictionary check will happen after first row index stride
+      (default 10000 rows) else dictionary check will happen before writing
+      first stripe. In both cases, the decision to use dictionary or not will
+      be retained thereafter.</td>
+</tr>
+<tr>
+  <td>hive.exec.orc.compression.strategy</td>
+  <td>SPEED</td>
+  <td>Define the compression strategy to use while writing data. This changes
+      the compression level of higher level compression codec. Value can be
+      SPEED or COMPRESSION.</td>
+</tr>
+
+</td></tr></table>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/hive-ddl.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/spec-intro.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Hive Usage</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/compression.html">Compression</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      
+
+
+</ul>
+
+    
+  </aside>
+</div>
+
+
+      <div class="clear"></div>
+
+    </div>
+  </section>
+
+
+  <footer role="contentinfo">
+  <p>The contents of this website are &copy;&nbsp;2015
+     <a href="https://www.apache.org/">Apache Software Foundation</a>
+     under the terms of the <a
+      href="https://www.apache.org/licenses/LICENSE-2.0.html">
+      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
+      of the Apache Software Foundation.</p>
+</footer>
+
+  <script>
+  var anchorForId = function (id) {
+    var anchor = document.createElement("a");
+    anchor.className = "header-link";
+    anchor.href      = "#" + id;
+    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
+    anchor.title = "Permalink";
+    return anchor;
+  };
+
+  var linkifyAnchors = function (level, containingElement) {
+    var headers = containingElement.getElementsByTagName("h" + level);
+    for (var h = 0; h < headers.length; h++) {
+      var header = headers[h];
+
+      if (typeof header.id !== "undefined" && header.id !== "") {
+        header.appendChild(anchorForId(header.id));
+      }
+    }
+  };
+
+  document.onreadystatechange = function () {
+    if (this.readyState === "complete") {
+      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
+      if (!contentBlock) {
+        return;
+      }
+      for (var level = 1; level <= 6; level++) {
+        linkifyAnchors(level, contentBlock);
+      }
+    }
+  };
+</script>
+
+
+</body>
+</html>