You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/06/11 18:15:57 UTC
[5/6] orc git commit: Pushing update for ORC-65
http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/core-java.html
----------------------------------------------------------------------
diff --git a/docs/core-java.html b/docs/core-java.html
new file mode 100644
index 0000000..3458be9
--- /dev/null
+++ b/docs/core-java.html
@@ -0,0 +1,2167 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<head>
+ <meta charset="UTF-8">
+ <title>Using Core Java</title>
+ <meta name="viewport" content="width=device-width,initial-scale=1">
+ <meta name="generator" content="Jekyll v2.4.0">
+ <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+ <link rel="stylesheet" href="/css/screen.css">
+ <link rel="icon" type="image/x-icon" href="/favicon.ico">
+ <!--[if lt IE 9]>
+ <script src="/js/html5shiv.min.js"></script>
+ <script src="/js/respond.min.js"></script>
+ <![endif]-->
+</head>
+
+
+<body class="wrap">
+ <header role="banner">
+ <nav class="mobile-nav show-on-mobiles">
+ <ul>
+ <li class="">
+ <a href="/">Home</a>
+ </li>
+ <li class="current">
+ <a href="/docs/">Doc<span class="show-on-mobiles">s</span>
+ <span class="hide-on-mobiles">umentation</span></a>
+ </li>
+ <li class="">
+ <a href="/talks/">Talks</a>
+ </li>
+ <li class="">
+ <a href="/news/">News</a>
+ </li>
+ <li class="">
+ <a href="/help/">Help</a>
+ </li>
+ <li class="">
+ <a href="/develop/">Develop</a>
+ </li>
+</ul>
+
+ </nav>
+ <div class="grid">
+ <div class="unit one-third center-on-mobiles">
+ <h1>
+ <a href="/">
+ <span class="sr-only">Apache ORC</span>
+ <img src="/img/logo.png" width="249" height="101" alt="ORC Logo">
+ </a>
+ </h1>
+ </div>
+ <nav class="main-nav unit two-thirds hide-on-mobiles">
+ <ul>
+ <li class="">
+ <a href="/">Home</a>
+ </li>
+ <li class="current">
+ <a href="/docs/">Doc<span class="show-on-mobiles">s</span>
+ <span class="hide-on-mobiles">umentation</span></a>
+ </li>
+ <li class="">
+ <a href="/talks/">Talks</a>
+ </li>
+ <li class="">
+ <a href="/news/">News</a>
+ </li>
+ <li class="">
+ <a href="/help/">Help</a>
+ </li>
+ <li class="">
+ <a href="/develop/">Develop</a>
+ </li>
+</ul>
+
+ </nav>
+ </div>
+</header>
+
+
+ <section class="docs">
+ <div class="grid">
+
+ <div class="docs-nav-mobile unit whole show-on-mobiles">
+ <select onchange="if (this.value) window.location.href=this.value">
+ <option value="">Navigate the docs\u2026</option>
+
+ <optgroup label="Overview">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/index.html">Background</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/adopters.html">ORC Adopters</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/types.html">Types</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/indexes.html">Indexes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/acid.html">ACID support</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Installing">
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/building.html">Building ORC</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/releases.html">Releases</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Using in Hive">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/hive-ddl.html">Hive DDL</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Using in MapReduce">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/mapred.html">Using in MapRed</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/mapreduce.html">Using in MapReduce</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Using ORC Core">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/core-java.html">Using Core Java</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Tools">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/tools.html">Tools</option>
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Format Specification">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/run-length.html">Run Length Encoding</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/stripes.html">Stripes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/encodings.html">Column Encodings</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-index.html">Indexes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ </select>
+</div>
+
+
+ <div class="unit four-fifths">
+ <article>
+ <h1>Using Core Java</h1>
+ <p>The Core ORC API reads and writes ORC files into Hive\u2019s storage-api
+vectorized classes. Both Hive and MapReduce use the Core API to actually
+read and write the data.</p>
+
+<h2 id="vectorized-row-batch">Vectorized Row Batch</h2>
+
+<p>Data is passed to ORC as instances of <code>VectorizedRowBatch</code> that contain
+the data for 1024 rows. The focus is on speed and accessing the data
+fields directly. <code>cols</code> is an array of <code>ColumnVector</code> and <code>size</code> is the number
+of rows.</p>
+
+<pre><code class="language-java">package org.apache.hadoop.hive.ql.exec.vector;
+
+public class VectorizedRowBatch {
+ public ColumnVector[] cols;
+ public int size;
+ ...
+}
+</code></pre>
+
+<p><code>ColumnVector</code> is the parent type of the different kinds of columns
+and has some fields that are shared across all of the column types. In
+particular, the <code>noNulls</code> flag if there are no nulls in this column for
+this batch and the <code>isRepeating</code> flag for columns were the entire batch is the
+same value. For columns where <code>noNulls == false</code> the <code>isNull</code> array is true
+if that value is null.</p>
+
+<pre><code class="language-java">public abstract class ColumnVector {
+
+ // If the whole column vector has no nulls, this is true, otherwise false.
+ public boolean noNulls;
+
+ // If hasNulls is true, then this array contains true if the value is
+ // is null, otherwise false.
+ public boolean[] isNull;
+
+ /*
+ * True if same value repeats for whole column vector.
+ * If so, vector[0] holds the repeating value.
+ */
+ public boolean isRepeating;
+ ...
+}
+</code></pre>
+
+<p>The subtypes of <code>ColumnVector</code> are:</p>
+
+<table>
+ <thead>
+ <tr>
+ <th>ORC Type</th>
+ <th>ColumnVector</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>array</td>
+ <td>ListColumnVector</td>
+ </tr>
+ <tr>
+ <td>binary</td>
+ <td>BytesColumnVector</td>
+ </tr>
+ <tr>
+ <td>bigint</td>
+ <td>LongColumnVector</td>
+ </tr>
+ <tr>
+ <td>boolean</td>
+ <td>LongColumnVector</td>
+ </tr>
+ <tr>
+ <td>char</td>
+ <td>BytesColumnVector</td>
+ </tr>
+ <tr>
+ <td>date</td>
+ <td>LongColumnVector</td>
+ </tr>
+ <tr>
+ <td>decimal</td>
+ <td>DecimalColumnVector</td>
+ </tr>
+ <tr>
+ <td>double</td>
+ <td>DoubleColumnVector</td>
+ </tr>
+ <tr>
+ <td>float</td>
+ <td>DoubleColumnVector</td>
+ </tr>
+ <tr>
+ <td>int</td>
+ <td>LongColumnVector</td>
+ </tr>
+ <tr>
+ <td>map</td>
+ <td>MapColumnVector</td>
+ </tr>
+ <tr>
+ <td>smallint</td>
+ <td>LongColumnVector</td>
+ </tr>
+ <tr>
+ <td>string</td>
+ <td>BytesColumnVector</td>
+ </tr>
+ <tr>
+ <td>struct</td>
+ <td>StructColumnVector</td>
+ </tr>
+ <tr>
+ <td>timestamp</td>
+ <td>TimestampColumnVector</td>
+ </tr>
+ <tr>
+ <td>tinyint</td>
+ <td>LongColumnVector</td>
+ </tr>
+ <tr>
+ <td>uniontype</td>
+ <td>UnionColumnVector</td>
+ </tr>
+ <tr>
+ <td>varchar</td>
+ <td>BytesColumnVector</td>
+ </tr>
+ </tbody>
+</table>
+
+<p><code>LongColumnVector</code> handles all of the integer types (boolean, bigint,
+date, int, smallint, and tinyint). The data is represented as an array of
+longs where each value is sign-extended as necessary.</p>
+
+<pre><code class="language-java">public class LongColumnVector extends ColumnVector {
+ public long[] vector;
+ ...
+}
+</code></pre>
+
+<p><code>TimestampColumnVector</code> handles timestamp values. The data is represented
+as an array of longs and an array of ints.</p>
+
+<pre><code class="language-java">public class TimestampColumnVector extends ColumnVector {
+
+ // the number of milliseconds since 1 Jan 1970 00:00 GMT
+ public long[] time;
+
+ // the number of nanoseconds within the second
+ public int[] nanos
+ ...
+}
+</code></pre>
+
+<p><code>DoubleColumnVector</code> handles all of the floating point types (double,
+and float). The data is represented as an array of doubles.</p>
+
+<pre><code class="language-java">public class DoubleColumnVector extends ColumnVector {
+ public double[] vector;
+ ...
+}
+</code></pre>
+
+<p><code>DecimalColumnVector</code> handles decimal columns. The data is represented
+as an array of HiveDecimalWritable. Note that this implementation is not
+performant and will likely be replaced.</p>
+
+<pre><code class="language-java">public class DecimalColumnVector extends ColumnVector {
+ public HiveDecimalWritable[] vector;
+ ...
+}
+</code></pre>
+
+<p><code>BytesColumnVector</code> handles all of the binary types (binary, char,
+string, and varchar). The data is represented as a byte array, offset,
+and length. The byte arrays may or may not be shared between values.</p>
+
+<pre><code class="language-java">public class BytesColumnVector extends ColumnVector {
+ public byte[][] vector;
+ public int[] start;
+ public int[] length;
+ ...
+}
+</code></pre>
+
+<p><code>StructColumnVector</code> handles the struct columns and represents the data as an
+array of <code>ColumnVector</code>. The value for row 5 consists of the fifth value from
+each of the <code>fields</code> values.</p>
+
+<pre><code class="language-java">public class StructColumnVector extends ColumnVector {
+ public ColumnVector[] fields;
+ ...
+}
+</code></pre>
+
+<p><code>UnionColumnVector</code> handles the union columns and represents the data
+as an array of integers that pick the subtype and a <code>fields</code> array one
+per a subtype. Only the value of the <code>fields</code> that corresponds to
+<code>tags[row]</code> is set.</p>
+
+<pre><code class="language-java">public class UnionColumnVector extends ColumnVector {
+ public int[] tags;
+ public ColumnVector[] fields;
+ ...
+}
+</code></pre>
+
+<p><code>ListColumnVector</code> handles the array columns and represents the data
+as two arrays of integers for the offset and lengths and a
+<code>ColumnVector</code> for the children values.</p>
+
+<pre><code class="language-java">public class ListColumnVector extends ColumnVector {
+ // for each row, the first offset of the child
+ public long[] offsets;
+ // for each row, the number of elements in the array
+ public long[] lengths;
+ // the offset in the child that should be used for new values
+ public int childCount;
+
+ // the values of the children
+ public ColumnVector child;
+ ...
+}
+</code></pre>
+
+<p><code>MapColumnVector</code> handles the map columns and represents the data
+as two arrays of integers for the offset and lengths and two
+<code>ColumnVector</code>s for the keys and values.</p>
+
+<pre><code class="language-java">public class ListColumnVector extends ColumnVector {
+ // for each row, the first offset of the child
+ public long[] offsets;
+ // for each row, the number of elements in the array
+ public long[] lengths;
+ // the offset in the child that should be used for new values
+ public int childCount;
+
+ // the values of the keys and values
+ public ColumnVector keys;
+ public ColumnVector values;
+ ...
+}
+</code></pre>
+
+<h2 id="writing-orc-files">Writing ORC Files</h2>
+
+<p>To write an ORC file, you need to define the schema and create a <code>Writer</code>
+with the desired filename. This example sets the required schema parameter,
+but there are many other options to control the ORC writer.</p>
+
+<pre><code class="language-java">TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
+Writer writer = OrcFile.createWriter(new Path("my-file.orc"),
+ OrcFile.writerOptions(conf)
+ .schema(schema));
+</code></pre>
+
+<p>Now you need to create a row batch, set the data, and write it to the file
+as the batch fills up. When the file is done, close the <code>Writer</code>.</p>
+
+<pre><code class="language-java">VectorizedRowBatch batch = schema.createRowBatch();
+LongColumnVector x = (LongColumnVector) batch.cols[0];
+LongColumnVector y = (LongColumnVector) batch.cols[1];
+for(int r=0; r < 10000; ++r) {
+ int row = batch.size++;
+ x.vector[row] = r;
+ y.vector[row] = r * 3;
+ // If the batch is full, write it out and start over.
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+}
+writer.close();
+</code></pre>
+
+<h2 id="reading-orc-files">Reading ORC Files</h2>
+
+<p>To read ORC files, create a <code>Reader</code> that contains the metadata about
+the file. There are a few options to the ORC reader, but far fewer than
+the writer and none of them are required. The reader has methods for
+getting the number of rows, schema, compression, etc. from the file.</p>
+
+<pre><code class="language-java">Reader reader = OrcFile.createReader(new Path("my-file.orc"),
+ OrcFile.readerOptions(conf));
+</code></pre>
+
+<p>To get the data, create a <code>RecordReader</code> object. By default, the
+RecordReader reads all rows and all columns, but there are options to
+control the data that is read.</p>
+
+<pre><code class="language-java">RecordReader rows = reader.rows();
+VectorizedRowBatch batch = reader.getSchema().createRowBatch();
+</code></pre>
+
+<p>With a <code>RecordReader</code> the user can ask for the next batch until there
+are no more left. The reader will stop the batch at certain boundaries, so the
+returned batch may not be full, but it will always contain some rows.</p>
+
+<pre><code class="language-java">while (rows.nextBatch(batch)) {
+ for(int r=0; r < batch.size; ++r) {
+ ... process row r from batch
+ }
+}
+rows.close();
+</code></pre>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <div class="section-nav">
+ <div class="left align-right">
+
+
+
+ <a href="/docs/mapreduce.html" class="prev">Back</a>
+
+ </div>
+ <div class="right align-left">
+
+
+
+ <a href="/docs/tools.html" class="next">Next</a>
+
+ </div>
+ </div>
+ <div class="clear"></div>
+
+
+ </article>
+ </div>
+
+ <div class="unit one-fifth hide-on-mobiles">
+ <aside>
+
+ <h4>Overview</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/types.html">Types</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
+
+
+</ul>
+
+
+ <h4>Installing</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/building.html">Building ORC</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/releases.html">Releases</a></li>
+
+
+
+</ul>
+
+
+ <h4>Using in Hive</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+</ul>
+
+
+ <h4>Using in MapReduce</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+
+
+
+</ul>
+
+
+ <h4>Using ORC Core</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class="current"><a href="/docs/core-java.html">Using Core Java</a></li>
+
+
+
+</ul>
+
+
+ <h4>Tools</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/tools.html">Tools</a></li>
+
+
+
+</ul>
+
+
+ <h4>Format Specification</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/compression.html">Compression</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+
+
+
+</ul>
+
+
+ </aside>
+</div>
+
+
+ <div class="clear"></div>
+
+ </div>
+ </section>
+
+
+ <footer role="contentinfo">
+ <p>The contents of this website are © 2016
+ <a href="https://www.apache.org/">Apache Software Foundation</a>
+ under the terms of the <a
+ href="https://www.apache.org/licenses/LICENSE-2.0.html">
+ Apache License v2</a>. Apache ORC and its logo are trademarks
+ of the Apache Software Foundation.</p>
+</footer>
+
+ <script>
+ var anchorForId = function (id) {
+ var anchor = document.createElement("a");
+ anchor.className = "header-link";
+ anchor.href = "#" + id;
+ anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
+ anchor.title = "Permalink";
+ return anchor;
+ };
+
+ var linkifyAnchors = function (level, containingElement) {
+ var headers = containingElement.getElementsByTagName("h" + level);
+ for (var h = 0; h < headers.length; h++) {
+ var header = headers[h];
+
+ if (typeof header.id !== "undefined" && header.id !== "") {
+ header.appendChild(anchorForId(header.id));
+ }
+ }
+ };
+
+ document.onreadystatechange = function () {
+ if (this.readyState === "complete") {
+ var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
+ if (!contentBlock) {
+ return;
+ }
+ for (var level = 1; level <= 6; level++) {
+ linkifyAnchors(level, contentBlock);
+ }
+ }
+ };
+</script>
+
+
+</body>
+</html>
http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/encodings.html
----------------------------------------------------------------------
diff --git a/docs/encodings.html b/docs/encodings.html
index 7e66102..426b380 100644
--- a/docs/encodings.html
+++ b/docs/encodings.html
@@ -105,6 +105,12 @@
+
+
+
+
+
+
<option value="/docs/index.html">Background</option>
@@ -120,6 +126,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/adopters.html">ORC Adopters</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -149,6 +209,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
<option value="/docs/types.html">Types</option>
@@ -171,6 +245,12 @@
+
+
+
+
+
+
<option value="/docs/indexes.html">Indexes</option>
@@ -184,6 +264,14 @@
+
+
+
+
+
+
+
+
@@ -216,11 +304,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
</optgroup>
- <optgroup label="Hive Usage">
+ <optgroup label="Installing">
@@ -232,13 +334,27 @@
+ <option value="/docs/building.html">Building ORC</option>
+
- <option value="/docs/hive-ddl.html">Hive DDL</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -268,7 +384,17 @@
- <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
+
+
+
+
+
+
+
+
+
@@ -276,6 +402,10 @@
+ <option value="/docs/releases.html">Releases</option>
+
+
+
@@ -291,7 +421,7 @@
</optgroup>
- <optgroup label="Format Specification">
+ <optgroup label="Using in Hive">
@@ -315,11 +445,25 @@
+ <option value="/docs/hive-ddl.html">Hive DDL</option>
+
- <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -337,7 +481,17 @@
- <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+ <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
@@ -358,6 +512,17 @@
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Using in MapReduce">
+
+
@@ -365,7 +530,11 @@
- <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
@@ -381,6 +550,16 @@
+ <option value="/docs/mapred.html">Using in MapRed</option>
+
+
+
+
+
+
+
+
+
@@ -411,7 +590,7 @@
- <option value="/docs/run-length.html">Run Length Encoding</option>
+
@@ -419,12 +598,14 @@
+ <option value="/docs/mapreduce.html">Using in MapReduce</option>
+
-
+
-
+
@@ -434,7 +615,16 @@
+
+
+ </optgroup>
+ <optgroup label="Using ORC Core">
+
+
+
+
+
@@ -445,25 +635,27 @@
+ <option value="/docs/core-java.html">Using Core Java</option>
+
- <option value="/docs/stripes.html">Stripes</option>
+
-
+
-
+
- <option value="/docs/encodings.html">Column Encodings</option>
+
@@ -476,7 +668,16 @@
+
+
+ </optgroup>
+ <optgroup label="Tools">
+
+
+
+
+
@@ -486,9 +687,9 @@
-
+
-
+
@@ -509,12 +710,14 @@
- <option value="/docs/spec-index.html">Indexes</option>
+
+ <option value="/docs/tools.html">Tools</option>
+
@@ -522,38 +725,367 @@
</optgroup>
- </select>
-</div>
-
+ <optgroup label="Format Specification">
+
- <div class="unit four-fifths">
- <article>
- <h1>Column Encodings</h1>
- <h2 id="smallint-int-and-bigint-columns">SmallInt, Int, and BigInt Columns</h2>
-<p>All of the 16, 32, and 64 bit integer column types use the same set of
-potential encodings, which is basically whether they use RLE v1 or
-v2. If the PRESENT stream is not included, all of the values are
-present. For values that have false bits in the present stream, no
-values are included in the data stream.</p>
+
-<table>
- <thead>
- <tr>
- <th style="text-align: left">Encoding</th>
- <th style="text-align: left">Stream Kind</th>
- <th style="text-align: left">Optional</th>
- <th style="text-align: left">Contents</th>
- </tr>
- </thead>
- <tbody>
- <tr>
- <td style="text-align: left">DIRECT</td>
- <td style="text-align: left">PRESENT</td>
- <td style="text-align: left">Yes</td>
- <td style="text-align: left">Boolean RLE</td>
- </tr>
- <tr>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/run-length.html">Run Length Encoding</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/stripes.html">Stripes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/encodings.html">Column Encodings</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-index.html">Indexes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ </select>
+</div>
+
+
+ <div class="unit four-fifths">
+ <article>
+ <h1>Column Encodings</h1>
+ <h2 id="smallint-int-and-bigint-columns">SmallInt, Int, and BigInt Columns</h2>
+
+<p>All of the 16, 32, and 64 bit integer column types use the same set of
+potential encodings, which is basically whether they use RLE v1 or
+v2. If the PRESENT stream is not included, all of the values are
+present. For values that have false bits in the present stream, no
+values are included in the data stream.</p>
+
+<table>
+ <thead>
+ <tr>
+ <th style="text-align: left">Encoding</th>
+ <th style="text-align: left">Stream Kind</th>
+ <th style="text-align: left">Optional</th>
+ <th style="text-align: left">Contents</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td style="text-align: left">DIRECT</td>
+ <td style="text-align: left">PRESENT</td>
+ <td style="text-align: left">Yes</td>
+ <td style="text-align: left">Boolean RLE</td>
+ </tr>
+ <tr>
<td style="text-align: left">�</td>
<td style="text-align: left">DATA</td>
<td style="text-align: left">No</td>
@@ -1110,105 +1642,334 @@ another child column for the value.</p>
</tbody>
</table>
-<h2 id="union-columns">Union Columns</h2>
+<h2 id="union-columns">Union Columns</h2>
+
+<p>Unions are encoded as the PRESENT stream and a tag stream that controls which
+potential variant is used. They have a child column for each variant of the
+union. Currently ORC union types are limited to 256 variants, which matches
+the Hive type model.</p>
+
+<table>
+ <thead>
+ <tr>
+ <th style="text-align: left">Encoding</th>
+ <th style="text-align: left">Stream Kind</th>
+ <th style="text-align: left">Optional</th>
+ <th style="text-align: left">Contents</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td style="text-align: left">DIRECT</td>
+ <td style="text-align: left">PRESENT</td>
+ <td style="text-align: left">Yes</td>
+ <td style="text-align: left">Boolean RLE</td>
+ </tr>
+ <tr>
+ <td style="text-align: left">�</td>
+ <td style="text-align: left">DIRECT</td>
+ <td style="text-align: left">No</td>
+ <td style="text-align: left">Byte RLE</td>
+ </tr>
+ </tbody>
+</table>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <div class="section-nav">
+ <div class="left align-right">
+
+
+
+ <a href="/docs/stripes.html" class="prev">Back</a>
+
+ </div>
+ <div class="right align-left">
+
+
+
+ <a href="/docs/spec-index.html" class="next">Next</a>
+
+ </div>
+ </div>
+ <div class="clear"></div>
+
+
+ </article>
+ </div>
+
+ <div class="unit one-fifth hide-on-mobiles">
+ <aside>
+
+ <h4>Overview</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/types.html">Types</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
-<p>Unions are encoded as the PRESENT stream and a tag stream that controls which
-potential variant is used. They have a child column for each variant of the
-union. Currently ORC union types are limited to 256 variants, which matches
-the Hive type model.</p>
+
-<table>
- <thead>
- <tr>
- <th style="text-align: left">Encoding</th>
- <th style="text-align: left">Stream Kind</th>
- <th style="text-align: left">Optional</th>
- <th style="text-align: left">Contents</th>
- </tr>
- </thead>
- <tbody>
- <tr>
- <td style="text-align: left">DIRECT</td>
- <td style="text-align: left">PRESENT</td>
- <td style="text-align: left">Yes</td>
- <td style="text-align: left">Boolean RLE</td>
- </tr>
- <tr>
- <td style="text-align: left">�</td>
- <td style="text-align: left">DIRECT</td>
- <td style="text-align: left">No</td>
- <td style="text-align: left">Byte RLE</td>
- </tr>
- </tbody>
-</table>
+
+
+
-
+
+
+ <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
+</ul>
+
+ <h4>Installing</h4>
+
+<ul>
-
+
+
-
+
+
+ <li class=""><a href="/docs/building.html">Building ORC</a></li>
+
+
+
+
+
-
+
+
-
+
+
-
+
+
-
+
+
-
+
+
-
+
+
- <div class="section-nav">
- <div class="left align-right">
-
-
-
- <a href="/docs/stripes.html" class="prev">Back</a>
-
- </div>
- <div class="right align-left">
-
-
-
- <a href="/docs/spec-index.html" class="next">Next</a>
-
- </div>
- </div>
- <div class="clear"></div>
+ <li class=""><a href="/docs/releases.html">Releases</a></li>
+
- </article>
- </div>
- <div class="unit one-fifth hide-on-mobiles">
- <aside>
+</ul>
+
- <h4>Overview</h4>
+ <h4>Using in Hive</h4>
<ul>
@@ -1233,7 +1994,11 @@ the Hive type model.</p>
- <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+ <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
@@ -1259,6 +2024,38 @@ the Hive type model.</p>
+ <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+</ul>
+
+
+ <h4>Using in MapReduce</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1269,7 +2066,7 @@ the Hive type model.</p>
- <li class=""><a href="/docs/types.html">Types</a></li>
+ <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
@@ -1295,10 +2092,28 @@ the Hive type model.</p>
- <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+</ul>
+
+
+ <h4>Using ORC Core</h4>
+
+
+<ul>
+
@@ -1307,14 +2122,22 @@ the Hive type model.</p>
- <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
</ul>
- <h4>Hive Usage</h4>
+ <h4>Tools</h4>
<ul>
@@ -1337,16 +2160,12 @@ the Hive type model.</p>
- <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
-
-
-
-
+
-
+
@@ -1357,7 +2176,17 @@ the Hive type model.</p>
- <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/tools.html">Tools</a></li>
@@ -1397,6 +2226,18 @@ the Hive type model.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
@@ -1415,6 +2256,12 @@ the Hive type model.</p>
+
+
+
+
+
+
<li class=""><a href="/docs/file-tail.html">File Tail</a></li>
@@ -1429,6 +2276,10 @@ the Hive type model.</p>
+
+
+
+
<li class=""><a href="/docs/compression.html">Compression</a></li>
@@ -1457,6 +2308,18 @@ the Hive type model.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
@@ -1491,6 +2354,18 @@ the Hive type model.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/stripes.html">Stripes</a></li>
@@ -1507,6 +2382,12 @@ the Hive type model.</p>
+
+
+
+
+
+
<li class="current"><a href="/docs/encodings.html">Column Encodings</a></li>
@@ -1537,6 +2418,18 @@ the Hive type model.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/spec-index.html">Indexes</a></li>
http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/file-tail.html
----------------------------------------------------------------------
diff --git a/docs/file-tail.html b/docs/file-tail.html
index 8a2e761..7caa49e 100644
--- a/docs/file-tail.html
+++ b/docs/file-tail.html
@@ -105,6 +105,12 @@
+
+
+
+
+
+
<option value="/docs/index.html">Background</option>
@@ -120,6 +126,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/adopters.html">ORC Adopters</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -149,6 +209,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
<option value="/docs/types.html">Types</option>
@@ -171,6 +245,12 @@
+
+
+
+
+
+
<option value="/docs/indexes.html">Indexes</option>
@@ -184,6 +264,14 @@
+
+
+
+
+
+
+
+
@@ -216,11 +304,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
</optgroup>
- <optgroup label="Hive Usage">
+ <optgroup label="Installing">
@@ -232,13 +334,27 @@
+ <option value="/docs/building.html">Building ORC</option>
+
- <option value="/docs/hive-ddl.html">Hive DDL</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -268,7 +384,17 @@
- <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
+
+
+
+
+
+
+
+
+
@@ -276,6 +402,10 @@
+ <option value="/docs/releases.html">Releases</option>
+
+
+
@@ -291,7 +421,7 @@
</optgroup>
- <optgroup label="Format Specification">
+ <optgroup label="Using in Hive">
@@ -315,11 +445,25 @@
+ <option value="/docs/hive-ddl.html">Hive DDL</option>
+
- <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -337,7 +481,17 @@
- <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+ <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
@@ -358,6 +512,17 @@
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Using in MapReduce">
+
+
@@ -365,7 +530,11 @@
- <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
@@ -381,6 +550,16 @@
+ <option value="/docs/mapred.html">Using in MapRed</option>
+
+
+
+
+
+
+
+
+
@@ -411,7 +590,7 @@
- <option value="/docs/run-length.html">Run Length Encoding</option>
+
@@ -419,12 +598,14 @@
+ <option value="/docs/mapreduce.html">Using in MapReduce</option>
+
-
+
-
+
@@ -434,7 +615,16 @@
+
+
+ </optgroup>
+ <optgroup label="Using ORC Core">
+
+
+
+
+
@@ -445,25 +635,27 @@
+ <option value="/docs/core-java.html">Using Core Java</option>
+
- <option value="/docs/stripes.html">Stripes</option>
+
-
+
-
+
- <option value="/docs/encodings.html">Column Encodings</option>
+
@@ -476,7 +668,16 @@
+
+
+ </optgroup>
+ <optgroup label="Tools">
+
+
+
+
+
@@ -486,9 +687,9 @@
-
+
-
+
@@ -509,12 +710,14 @@
- <option value="/docs/spec-index.html">Indexes</option>
+
+ <option value="/docs/tools.html">Tools</option>
+
@@ -522,38 +725,367 @@
</optgroup>
- </select>
-</div>
-
-
- <div class="unit four-fifths">
- <article>
- <h1>File Tail</h1>
- <p>Since HDFS does not support changing the data in a file after it is
-written, ORC stores the top level index at the end of the file. The
-overall structure of the file is given in the figure above. The
-file\u2019s tail consists of 3 parts; the file metadata, file footer and
-postscript.</p>
+ <optgroup label="Format Specification">
+
-<p>The metadata for ORC is stored using
-<a href="http://s.apache.org/protobuf_encoding">Protocol Buffers</a>, which provides
-the ability to add new fields without breaking readers. This document
-incorporates the Protobuf definition from the
-<a href="http://s.apache.org/orc_proto">ORC source code</a> and the
-reader is encouraged to review the Protobuf encoding if they need to
-understand the byte-level encoding</p>
-<h1 id="postscript">Postscript</h1>
+
-<p>The Postscript section provides the necessary information to interpret
-the rest of the file including the length of the file\u2019s Footer and
-Metadata sections, the version of the file, and the kind of general
-compression used (eg. none, zlib, or snappy). The Postscript is never
-compressed and ends one byte before the end of the file. The version
-stored in the Postscript is the lowest version of Hive that is
-guaranteed to be able to read the file and it stored as a sequence of
-the major and minor version. There are currently two versions that are
-used: [0,11] for Hive 0.11, and [0,12] for Hive 0.12 or later.</p>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/run-length.html">Run Length Encoding</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/stripes.html">Stripes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/encodings.html">Column Encodings</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-index.html">Indexes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ </select>
+</div>
+
+
+ <div class="unit four-fifths">
+ <article>
+ <h1>File Tail</h1>
+ <p>Since HDFS does not support changing the data in a file after it is
+written, ORC stores the top level index at the end of the file. The
+overall structure of the file is given in the figure above. The
+file\u2019s tail consists of 3 parts; the file metadata, file footer and
+postscript.</p>
+
+<p>The metadata for ORC is stored using
+<a href="http://s.apache.org/protobuf_encoding">Protocol Buffers</a>, which provides
+the ability to add new fields without breaking readers. This document
+incorporates the Protobuf definition from the
+<a href="http://s.apache.org/orc_proto">ORC source code</a> and the
+reader is encouraged to review the Protobuf encoding if they need to
+understand the byte-level encoding</p>
+
+<h1 id="postscript">Postscript</h1>
+
+<p>The Postscript section provides the necessary information to interpret
+the rest of the file including the length of the file\u2019s Footer and
+Metadata sections, the version of the file, and the kind of general
+compression used (eg. none, zlib, or snappy). The Postscript is never
+compressed and ends one byte before the end of the file. The version
+stored in the Postscript is the lowest version of Hive that is
+guaranteed to be able to read the file and it stored as a sequence of
+the major and minor version. There are currently two versions that are
+used: [0,11] for Hive 0.11, and [0,12] for Hive 0.12 or later.</p>
<p>The process of reading an ORC file works backwards through the
file. Rather than making multiple short reads, the ORC reader reads
@@ -805,100 +1337,329 @@ milliseconds since the epoch (1/1/2015).</p>
<p>Binary columns store the aggregate number of bytes across all of the values.</p>
-<p><code>message BinaryStatistics {
- // sum will store the total binary blob length
- optional sint64 sum = 1;
-}
-</code></p>
+<p><code>message BinaryStatistics {
+ // sum will store the total binary blob length
+ optional sint64 sum = 1;
+}
+</code></p>
+
+<h2 id="user-metadata">User Metadata</h2>
+
+<p>The user can add arbitrary key/value pairs to an ORC file as it is
+written. The contents of the keys and values are completely
+application defined, but the key is a string and the value is
+binary. Care should be taken by applications to make sure that their
+keys are unique and in general should be prefixed with an organization
+code.</p>
+
+<p><code>message UserMetadataItem {
+ // the user defined key
+ required string name = 1;
+ // the user defined binary value
+ required bytes value = 2;
+}
+</code></p>
+
+<h2 id="file-metadata">File Metadata</h2>
+
+<p>The file Metadata section contains column statistics at the stripe
+level granularity. These statistics enable input split elimination
+based on the predicate push-down evaluated per a stripe.</p>
+
+<p><code>message StripeStatistics {
+ repeated ColumnStatistics colStats = 1;
+}
+</code></p>
+
+<p><code>message Metadata {
+ repeated StripeStatistics stripeStats = 1;
+}
+</code></p>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <div class="section-nav">
+ <div class="left align-right">
+
+
+
+ <a href="/docs/spec-intro.html" class="prev">Back</a>
+
+ </div>
+ <div class="right align-left">
+
+
+
+ <a href="/docs/compression.html" class="next">Next</a>
+
+ </div>
+ </div>
+ <div class="clear"></div>
+
+
+ </article>
+ </div>
+
+ <div class="unit one-fifth hide-on-mobiles">
+ <aside>
+
+ <h4>Overview</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/types.html">Types</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
-<h2 id="user-metadata">User Metadata</h2>
+
-<p>The user can add arbitrary key/value pairs to an ORC file as it is
-written. The contents of the keys and values are completely
-application defined, but the key is a string and the value is
-binary. Care should be taken by applications to make sure that their
-keys are unique and in general should be prefixed with an organization
-code.</p>
+
+
+
-<p><code>message UserMetadataItem {
- // the user defined key
- required string name = 1;
- // the user defined binary value
- required bytes value = 2;
-}
-</code></p>
+
+
+ <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
-<h2 id="file-metadata">File Metadata</h2>
-<p>The file Metadata section contains column statistics at the stripe
-level granularity. These statistics enable input split elimination
-based on the predicate push-down evaluated per a stripe.</p>
+</ul>
-<p><code>message StripeStatistics {
- repeated ColumnStatistics colStats = 1;
-}
-</code></p>
+
+ <h4>Installing</h4>
+
-<p><code>message Metadata {
- repeated StripeStatistics stripeStats = 1;
-}
-</code></p>
+<ul>
-
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/building.html">Building ORC</a></li>
+
+
+
+
-
+
+
-
+
+
-
+
+
-
+
+
-
+
+
-
+
+
- <div class="section-nav">
- <div class="left align-right">
-
-
-
- <a href="/docs/spec-intro.html" class="prev">Back</a>
-
- </div>
- <div class="right align-left">
-
-
-
- <a href="/docs/compression.html" class="next">Next</a>
-
- </div>
- </div>
- <div class="clear"></div>
+ <li class=""><a href="/docs/releases.html">Releases</a></li>
+
- </article>
- </div>
- <div class="unit one-fifth hide-on-mobiles">
- <aside>
+</ul>
+
- <h4>Overview</h4>
+ <h4>Using in Hive</h4>
<ul>
@@ -923,7 +1684,11 @@ based on the predicate push-down evaluated per a stripe.</p>
- <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+ <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
@@ -949,6 +1714,38 @@ based on the predicate push-down evaluated per a stripe.</p>
+ <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+</ul>
+
+
+ <h4>Using in MapReduce</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -959,7 +1756,7 @@ based on the predicate push-down evaluated per a stripe.</p>
- <li class=""><a href="/docs/types.html">Types</a></li>
+ <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
@@ -985,10 +1782,28 @@ based on the predicate push-down evaluated per a stripe.</p>
- <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+</ul>
+
+
+ <h4>Using ORC Core</h4>
+
+
+<ul>
+
@@ -997,14 +1812,22 @@ based on the predicate push-down evaluated per a stripe.</p>
- <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
</ul>
- <h4>Hive Usage</h4>
+ <h4>Tools</h4>
<ul>
@@ -1027,16 +1850,12 @@ based on the predicate push-down evaluated per a stripe.</p>
- <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
-
-
-
-
+
-
+
@@ -1047,7 +1866,17 @@ based on the predicate push-down evaluated per a stripe.</p>
- <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/tools.html">Tools</a></li>
@@ -1087,6 +1916,18 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
@@ -1105,6 +1946,12 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
+
+
<li class="current"><a href="/docs/file-tail.html">File Tail</a></li>
@@ -1119,6 +1966,10 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
<li class=""><a href="/docs/compression.html">Compression</a></li>
@@ -1147,6 +1998,18 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
@@ -1181,6 +2044,18 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/stripes.html">Stripes</a></li>
@@ -1197,6 +2072,12 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
+
+
<li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
@@ -1227,6 +2108,18 @@ based on the predicate push-down evaluated per a stripe.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/spec-index.html">Indexes</a></li>
http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/hive-config.html
----------------------------------------------------------------------
diff --git a/docs/hive-config.html b/docs/hive-config.html
index 4179348..7f4dd83 100644
--- a/docs/hive-config.html
+++ b/docs/hive-config.html
@@ -105,6 +105,12 @@
+
+
+
+
+
+
<option value="/docs/index.html">Background</option>
@@ -120,6 +126,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/adopters.html">ORC Adopters</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -149,6 +209,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
<option value="/docs/types.html">Types</option>
@@ -171,6 +245,12 @@
+
+
+
+
+
+
<option value="/docs/indexes.html">Indexes</option>
@@ -184,6 +264,14 @@
+
+
+
+
+
+
+
+
@@ -216,11 +304,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
</optgroup>
- <optgroup label="Hive Usage">
+ <optgroup label="Installing">
@@ -232,13 +334,27 @@
+ <option value="/docs/building.html">Building ORC</option>
+
- <option value="/docs/hive-ddl.html">Hive DDL</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -268,7 +384,17 @@
- <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
+
+
+
+
+
+
+
+
+
@@ -276,6 +402,10 @@
+ <option value="/docs/releases.html">Releases</option>
+
+
+
@@ -291,7 +421,7 @@
</optgroup>
- <optgroup label="Format Specification">
+ <optgroup label="Using in Hive">
@@ -315,11 +445,25 @@
+ <option value="/docs/hive-ddl.html">Hive DDL</option>
+
- <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -337,7 +481,17 @@
- <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+ <option value="/docs/hive-config.html">Hive Configuration</option>
+
+
@@ -358,6 +512,17 @@
+
+
+
+
+
+
+ </optgroup>
+
+ <optgroup label="Using in MapReduce">
+
+
@@ -365,7 +530,11 @@
- <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
@@ -381,6 +550,16 @@
+ <option value="/docs/mapred.html">Using in MapRed</option>
+
+
+
+
+
+
+
+
+
@@ -411,7 +590,7 @@
- <option value="/docs/run-length.html">Run Length Encoding</option>
+
@@ -419,12 +598,14 @@
+ <option value="/docs/mapreduce.html">Using in MapReduce</option>
+
-
+
-
+
@@ -434,7 +615,16 @@
+
+
+ </optgroup>
+ <optgroup label="Using ORC Core">
+
+
+
+
+
@@ -445,25 +635,27 @@
+ <option value="/docs/core-java.html">Using Core Java</option>
+
- <option value="/docs/stripes.html">Stripes</option>
+
-
+
-
+
- <option value="/docs/encodings.html">Column Encodings</option>
+
@@ -476,7 +668,16 @@
+
+
+ </optgroup>
+ <optgroup label="Tools">
+
+
+
+
+
@@ -486,9 +687,9 @@
-
+
-
+
@@ -509,12 +710,14 @@
- <option value="/docs/spec-index.html">Indexes</option>
+
+ <option value="/docs/tools.html">Tools</option>
+
@@ -522,38 +725,367 @@
</optgroup>
- </select>
-</div>
-
+ <optgroup label="Format Specification">
+
- <div class="unit four-fifths">
- <article>
- <h1>Hive Configuration</h1>
- <h2 id="table-properties">Table properties</h2>
-<p>Tables stored as ORC files use table properties to control their behavior. By
-using table properties, the table owner ensures that all clients store data
-with the same options.</p>
+
-<table>
- <thead>
- <tr>
- <th style="text-align: left">Key</th>
- <th style="text-align: left">Default</th>
- <th style="text-align: left">Notes</th>
- </tr>
- </thead>
- <tbody>
- <tr>
- <td style="text-align: left">orc.compress</td>
- <td style="text-align: left">ZLIB</td>
- <td style="text-align: left">high level compression = {NONE, ZLIB, SNAPPY}</td>
- </tr>
- <tr>
- <td style="text-align: left">orc.compress.size</td>
- <td style="text-align: left">262,144</td>
- <td style="text-align: left">compression chunk size</td>
- </tr>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-intro.html">Introduction</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/file-tail.html">File Tail</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/compression.html">Compression</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/run-length.html">Run Length Encoding</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/stripes.html">Stripes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/encodings.html">Column Encodings</option>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <option value="/docs/spec-index.html">Indexes</option>
+
+
+
+
+
+
+
+
+
+
+
+
+ </optgroup>
+
+ </select>
+</div>
+
+
+ <div class="unit four-fifths">
+ <article>
+ <h1>Hive Configuration</h1>
+ <h2 id="table-properties">Table properties</h2>
+
+<p>Tables stored as ORC files use table properties to control their behavior. By
+using table properties, the table owner ensures that all clients store data
+with the same options.</p>
+
+<table>
+ <thead>
+ <tr>
+ <th style="text-align: left">Key</th>
+ <th style="text-align: left">Default</th>
+ <th style="text-align: left">Notes</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td style="text-align: left">orc.compress</td>
+ <td style="text-align: left">ZLIB</td>
+ <td style="text-align: left">high level compression = {NONE, ZLIB, SNAPPY}</td>
+ </tr>
+ <tr>
+ <td style="text-align: left">orc.compress.size</td>
+ <td style="text-align: left">262,144</td>
+ <td style="text-align: left">compression chunk size</td>
+ </tr>
<tr>
<td style="text-align: left">orc.stripe.size</td>
<td style="text-align: left">268,435,456</td>
@@ -742,57 +1274,274 @@ with the same options.</p>
SPEED or COMPRESSION.</td>
</tr>
-</td></tr></table>
+</td></tr></table>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <div class="section-nav">
+ <div class="left align-right">
+
+
+
+ <a href="/docs/hive-ddl.html" class="prev">Back</a>
+
+ </div>
+ <div class="right align-left">
+
+
+
+ <a href="/docs/mapred.html" class="next">Next</a>
+
+ </div>
+ </div>
+ <div class="clear"></div>
+
+
+ </article>
+ </div>
+
+ <div class="unit one-fifth hide-on-mobiles">
+ <aside>
+
+ <h4>Overview</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/types.html">Types</a></li>
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
+
+
+ <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
+
+
+</ul>
+
+
+ <h4>Installing</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/building.html">Building ORC</a></li>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
- <div class="section-nav">
- <div class="left align-right">
-
-
-
- <a href="/docs/hive-ddl.html" class="prev">Back</a>
-
- </div>
- <div class="right align-left">
-
-
-
- <a href="/docs/spec-intro.html" class="next">Next</a>
-
- </div>
- </div>
- <div class="clear"></div>
+
+
+
+
+ <li class=""><a href="/docs/releases.html">Releases</a></li>
+
- </article>
- </div>
- <div class="unit one-fifth hide-on-mobiles">
- <aside>
+</ul>
+
- <h4>Overview</h4>
+ <h4>Using in Hive</h4>
<ul>
@@ -817,7 +1566,11 @@ with the same options.</p>
- <li class=""><a href="/docs/index.html">Background</a></li>
+
+
+
+
+ <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
@@ -843,6 +1596,38 @@ with the same options.</p>
+ <li class="current"><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+</ul>
+
+
+ <h4>Using in MapReduce</h4>
+
+
+<ul>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -853,7 +1638,7 @@ with the same options.</p>
- <li class=""><a href="/docs/types.html">Types</a></li>
+ <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
@@ -879,10 +1664,28 @@ with the same options.</p>
- <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+</ul>
+
+
+ <h4>Using ORC Core</h4>
+
+
+<ul>
+
@@ -891,14 +1694,22 @@ with the same options.</p>
- <li class=""><a href="/docs/acid.html">ACID support</a></li>
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
</ul>
- <h4>Hive Usage</h4>
+ <h4>Tools</h4>
<ul>
@@ -921,16 +1732,12 @@ with the same options.</p>
- <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
-
-
-
-
+
-
+
@@ -941,7 +1748,17 @@ with the same options.</p>
- <li class="current"><a href="/docs/hive-config.html">Hive Configuration</a></li>
+
+
+
+
+
+
+
+
+
+
+ <li class=""><a href="/docs/tools.html">Tools</a></li>
@@ -981,6 +1798,18 @@ with the same options.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
@@ -999,6 +1828,12 @@ with the same options.</p>
+
+
+
+
+
+
<li class=""><a href="/docs/file-tail.html">File Tail</a></li>
@@ -1013,6 +1848,10 @@ with the same options.</p>
+
+
+
+
<li class=""><a href="/docs/compression.html">Compression</a></li>
@@ -1041,6 +1880,18 @@ with the same options.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
@@ -1075,6 +1926,18 @@ with the same options.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/stripes.html">Stripes</a></li>
@@ -1091,6 +1954,12 @@ with the same options.</p>
+
+
+
+
+
+
<li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
@@ -1121,6 +1990,18 @@ with the same options.</p>
+
+
+
+
+
+
+
+
+
+
+
+
<li class=""><a href="/docs/spec-index.html">Indexes</a></li>