You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2018/04/17 17:49:55 UTC

[8/9] orc git commit: Pushing ORC-339 reorganize the ORC file format spec.

http://git-wip-us.apache.org/repos/asf/orc/blob/c6e29090/docs/core-cpp.html
----------------------------------------------------------------------
diff --git a/docs/core-cpp.html b/docs/core-cpp.html
index 130d019..ec31d6f 100644
--- a/docs/core-cpp.html
+++ b/docs/core-cpp.html
@@ -109,12 +109,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/index.html">Background</option>
     
   
@@ -130,14 +124,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -174,20 +160,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -221,20 +193,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
       <option value="/docs/types.html">Types</option>
     
   
@@ -261,12 +219,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/indexes.html">Indexes</option>
     
   
@@ -280,14 +232,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -324,20 +268,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -381,20 +311,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -426,25 +342,11 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/releases.html">Releases</option>
     
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -471,12 +373,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/hive-ddl.html">Hive DDL</option>
     
   
@@ -494,14 +390,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -519,12 +407,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/hive-config.html">Hive Configuration</option>
     
   
@@ -544,14 +426,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -586,12 +460,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/mapred.html">Using in MapRed</option>
     
   
@@ -601,14 +469,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -638,12 +498,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/mapreduce.html">Using in MapReduce</option>
     
   
@@ -651,14 +505,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -679,8 +525,6 @@
     
   
     
-  
-    
       <option value="/docs/core-java.html">Using Core Java</option>
     
   
@@ -704,18 +548,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -727,8 +559,6 @@
     
   
     
-  
-    
       <option value="/docs/core-cpp.html">Using Core C++</option>
     
   
@@ -754,18 +584,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -788,8 +606,6 @@
     
   
     
-  
-    
       <option value="/docs/cpp-tools.html">C++ Tools</option>
     
   
@@ -811,18 +627,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -848,12 +652,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/java-tools.html">Java Tools</option>
     
   
@@ -865,384 +663,19 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
     
-    <optgroup label="Format Specification">
-      
+  </select>
+</div>
 
 
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/spec-intro.html">Introduction</option>
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/file-tail.html">File Tail</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/compression.html">Compression</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/run-length.html">Run Length Encoding</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/stripes.html">Stripes</option>
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/encodings.html">Column Encodings</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/spec-index.html">Indexes</option>
-    
-  
-    
-  
-    
-  
-    
-  
-
-
-    </optgroup>
-    
-  </select>
-</div>
-
-
-      <div class="unit four-fifths">
-        <article>
-          <h1>Using Core C++</h1>
-          <p>The C++ Core ORC API reads and writes ORC files into its own
-orc::ColumnVectorBatch vectorized classes.</p>
+      <div class="unit four-fifths">
+        <article>
+          <h1>Using Core C++</h1>
+          <p>The C++ Core ORC API reads and writes ORC files into its own
+orc::ColumnVectorBatch vectorized classes.</p>
 
 <h2 id="vectorized-row-batch">Vectorized Row Batch</h2>
 
@@ -1345,576 +778,280 @@ value is null.</p>
       <td>UnionVectorBatch</td>
     </tr>
     <tr>
-      <td>varchar</td>
-      <td>StringVectorBatch</td>
-    </tr>
-  </tbody>
-</table>
-
-<p>LongVectorBatch handles all of the integer types (boolean, bigint,
-date, int, smallint, and tinyint). The data is represented as a
-buffer of int64_t where each value is sign-extended as necessary.</p>
-
-<pre><code class="language-cpp">  struct LongVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;int64_t&gt; data;
-    ...
-  };
-</code></pre>
-
-<p>TimestampVectorBatch handles timestamp values. The data is
-represented as two buffers of int64_t for seconds and nanoseconds
-respectively. Note that we always assume data is in GMT timezone;
-therefore it is user’s responsibility to convert wall clock time
-from local timezone to GMT.</p>
-
-<pre><code class="language-cpp">  struct TimestampVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;int64_t&gt; data;
-    DataBuffer&lt;int64_t&gt; nanoseconds;
-    ...
-  };
-</code></pre>
-
-<p>DoubleVectorBatch handles all of the floating point types
-(double, and float). The data is represented as a buffer of doubles.</p>
-
-<pre><code class="language-cpp">  struct DoubleVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;double&gt; data;
-    ...
-  };
-</code></pre>
-
-<p>Decimal64VectorBatch handles decimal columns with precision no
-greater than 18. Decimal128VectorBatch handles the others. The data
-is represented as a buffer of int64_t and orc::Int128 respectively.</p>
-
-<pre><code class="language-cpp">  struct Decimal64VectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;int64_t&gt; values;
-    ...
-  };
-
-  struct Decimal128VectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;Int128&gt; values;
-    ...
-  };
-</code></pre>
-
-<p>StringVectorBatch handles all of the binary types (binary,
-char, string, and varchar). The data is represented as a char* buffer,
-and a length buffer.</p>
-
-<pre><code class="language-cpp">  struct StringVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;char*&gt; data;
-    DataBuffer&lt;int64_t&gt; length;
-    ...
-  };
-</code></pre>
-
-<p>StructVectorBatch handles the struct columns and represents
-the data as a buffer of <code>ColumnVectorBatch</code>.</p>
-
-<pre><code class="language-cpp">  struct StructVectorBatch: public ColumnVectorBatch {
-    std::vector&lt;ColumnVectorBatch*&gt; fields;
-    ...
-  };
-</code></pre>
+      <td>varchar</td>
+      <td>StringVectorBatch</td>
+    </tr>
+  </tbody>
+</table>
 
-<p>UnionVectorBatch handles the union columns. It uses <code>tags</code>
-to indicate which subtype has the value and <code>offsets</code> indicates
-the offset in child batch of that subtype. A individual
-<code>ColumnVectorBatch</code> is used for each subtype.</p>
+<p>LongVectorBatch handles all of the integer types (boolean, bigint,
+date, int, smallint, and tinyint). The data is represented as a
+buffer of int64_t where each value is sign-extended as necessary.</p>
 
-<pre><code class="language-cpp">  struct UnionVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;unsigned char&gt; tags;
-    DataBuffer&lt;uint64_t&gt; offsets;
-    std::vector&lt;ColumnVectorBatch*&gt; children;
+<pre><code class="language-cpp">  struct LongVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;int64_t&gt; data;
     ...
   };
 </code></pre>
 
-<p>ListVectorBatch handles the array columns and represents
-the data as a buffer of integers for the offsets and a
-<code>ColumnVectorBatch</code> for the children values.</p>
+<p>TimestampVectorBatch handles timestamp values. The data is
+represented as two buffers of int64_t for seconds and nanoseconds
+respectively. Note that we always assume data is in GMT timezone;
+therefore it is user’s responsibility to convert wall clock time
+from local timezone to GMT.</p>
 
-<pre><code class="language-cpp">  struct ListVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;int64_t&gt; offsets;
-    ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; elements;
+<pre><code class="language-cpp">  struct TimestampVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;int64_t&gt; data;
+    DataBuffer&lt;int64_t&gt; nanoseconds;
     ...
   };
 </code></pre>
 
-<p>MapVectorBatch handles the map columns and represents the data
-as two arrays of integers for the offsets and two <code>ColumnVectorBatch</code>s
-for the keys and values.</p>
+<p>DoubleVectorBatch handles all of the floating point types
+(double, and float). The data is represented as a buffer of doubles.</p>
 
-<pre><code class="language-cpp">  struct MapVectorBatch: public ColumnVectorBatch {
-    DataBuffer&lt;int64_t&gt; offsets;
-    ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; keys;
-    ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; elements;
+<pre><code class="language-cpp">  struct DoubleVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;double&gt; data;
     ...
   };
 </code></pre>
 
-<h2 id="writing-orc-files">Writing ORC Files</h2>
-
-<p>To write an ORC file, you need to include <code>OrcFile.hh</code> and define
-the schema; then use <code>orc::OutputStream</code> and <code>orc::WriterOptions</code>
-to create a <code>orc::Writer</code> with the desired filename. This example
-sets the required schema parameter, but there are many other
-options to control the ORC writer.</p>
-
-<pre><code class="language-cpp">ORC_UNIQUE_PTR&lt;OutputStream&gt; outStream =
-  writeLocalFile("my-file.orc");
-ORC_UNIQUE_PTR&lt;Type&gt; schema(
-  Type::buildTypeFromString("struct&lt;x:int,y:int&gt;"));
-WriterOptions options;
-ORC_UNIQUE_PTR&lt;Writer&gt; writer =
-  createWriter(*schema, outStream.get(), options);
-</code></pre>
-
-<p>Now you need to create a row batch, set the data, and write it to the file
-as the batch fills up. When the file is done, close the <code>Writer</code>.</p>
-
-<pre><code class="language-cpp">uint64_t batchSize = 1024, rowCount = 10000;
-ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; batch =
-  writer-&gt;createRowBatch(batchSize);
-StructVectorBatch *root =
-  dynamic_cast&lt;StructVectorBatch *&gt;(batch.get());
-LongVectorBatch *x =
-  dynamic_cast&lt;LongVectorBatch *&gt;(root-&gt;fields[0]);
-LongVectorBatch *y =
-  dynamic_cast&lt;LongVectorBatch *&gt;(root-&gt;fields[1]);
-
-uint64_t rows = 0;
-for (uint64_t i = 0; i &lt; rowCount; ++i) {
-  x-&gt;data[rows] = i;
-  y-&gt;data[rows] = i * 3;
-  rows++;
-
-  if (rows == batchSize) {
-    root-&gt;numElements = rows;
-    x-&gt;numElements = rows;
-    y-&gt;numElements = rows;
-
-    writer-&gt;add(*batch);
-    rows = 0;
-  }
-}
-
-if (rows != 0) {
-  root-&gt;numElements = rows;
-  x-&gt;numElements = rows;
-  y-&gt;numElements = rows;
-
-  writer-&gt;add(*batch);
-  rows = 0;
-}
-
-writer-&gt;close();
-</code></pre>
-
-<h2 id="reading-orc-files">Reading ORC Files</h2>
-
-<p>To read ORC files, include <code>OrcFile.hh</code> file to create a <code>orc::Reader</code>
-that contains the metadata about the file. There are a few options to
-the <code>orc::Reader</code>, but far fewer than the writer and none of them are
-required. The reader has methods for getting the number of rows,
-schema, compression, etc. from the file.</p>
-
-<pre><code class="language-cpp">ORC_UNIQUE_PTR&lt;InputStream&gt; inStream =
-  readLocalFile("my-file.orc");
-ReaderOptions options;
-ORC_UNIQUE_PTR&lt;Reader&gt; reader =
-  createReader(inStream, options);
-</code></pre>
-
-<p>To get the data, create a <code>orc::RowReader</code> object. By default,
-the RowReader reads all rows and all columns, but there are
-options to control the data that is read.</p>
-
-<pre><code class="language-cpp">RowReaderOptions rowReaderOptions;
-ORC_UNIQUE_PTR&lt;RowReader&gt; rowReader =
-  reader-&gt;createRowReader(rowReaderOptions);
-ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; batch =
-  rowReader-&gt;createRowBatch(1024);
-</code></pre>
-
-<p>With a <code>orc::RowReader</code> the user can ask for the next batch until there
-are no more left. The reader will stop the batch at certain boundaries,
-so the returned batch may not be full, but it will always contain some rows.</p>
-
-<pre><code class="language-cpp">while (rowReader-&gt;next(*batch)) {
-  for (uint64_t r = 0; r &lt; batch-&gt;numElements; ++r) {
-    ... process row r from batch
-  }
-}
-</code></pre>
-
-          
-
-
-
-
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-    <div class="section-nav">
-      <div class="left align-right">
-          
-            
-            
-            <a href="/docs/core-java.html" class="prev">Back</a>
-          
-      </div>
-      <div class="right align-left">
-          
-            
-            
-            <a href="/docs/cpp-tools.html" class="next">Next</a>
-          
-      </div>
-    </div>
-    <div class="clear"></div>
-    
-
-        </article>
-      </div>
-
-      <div class="unit one-fifth hide-on-mobiles">
-  <aside>
-    
-    <h4>Overview</h4>
-    
-
-<ul>
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/index.html">Background</a></li>
-      
-
-
-  
+<p>Decimal64VectorBatch handles decimal columns with precision no
+greater than 18. Decimal128VectorBatch handles the others. The data
+is represented as a buffer of int64_t and orc::Int128 respectively.</p>
 
-  
-    
-  
+<pre><code class="language-cpp">  struct Decimal64VectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;int64_t&gt; values;
+    ...
+  };
 
-  
-    
-  
-    
-      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
-      
+  struct Decimal128VectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;Int128&gt; values;
+    ...
+  };
+</code></pre>
 
+<p>StringVectorBatch handles all of the binary types (binary,
+char, string, and varchar). The data is represented as a char* buffer,
+and a length buffer.</p>
 
-  
+<pre><code class="language-cpp">  struct StringVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;char*&gt; data;
+    DataBuffer&lt;int64_t&gt; length;
+    ...
+  };
+</code></pre>
 
-  
-    
-  
+<p>StructVectorBatch handles the struct columns and represents
+the data as a buffer of <code>ColumnVectorBatch</code>.</p>
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/types.html">Types</a></li>
-      
+<pre><code class="language-cpp">  struct StructVectorBatch: public ColumnVectorBatch {
+    std::vector&lt;ColumnVectorBatch*&gt; fields;
+    ...
+  };
+</code></pre>
 
+<p>UnionVectorBatch handles the union columns. It uses <code>tags</code>
+to indicate which subtype has the value and <code>offsets</code> indicates
+the offset in child batch of that subtype. A individual
+<code>ColumnVectorBatch</code> is used for each subtype.</p>
 
-  
+<pre><code class="language-cpp">  struct UnionVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;unsigned char&gt; tags;
+    DataBuffer&lt;uint64_t&gt; offsets;
+    std::vector&lt;ColumnVectorBatch*&gt; children;
+    ...
+  };
+</code></pre>
 
-  
-    
-  
+<p>ListVectorBatch handles the array columns and represents
+the data as a buffer of integers for the offsets and a
+<code>ColumnVectorBatch</code> for the children values.</p>
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
-      
+<pre><code class="language-cpp">  struct ListVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;int64_t&gt; offsets;
+    ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; elements;
+    ...
+  };
+</code></pre>
 
+<p>MapVectorBatch handles the map columns and represents the data
+as two arrays of integers for the offsets and two <code>ColumnVectorBatch</code>s
+for the keys and values.</p>
 
-  
+<pre><code class="language-cpp">  struct MapVectorBatch: public ColumnVectorBatch {
+    DataBuffer&lt;int64_t&gt; offsets;
+    ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; keys;
+    ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; elements;
+    ...
+  };
+</code></pre>
 
-  
-    
-  
+<h2 id="writing-orc-files">Writing ORC Files</h2>
 
-  
-    
-      <li class=""><a href="/docs/acid.html">ACID support</a></li>
-      
+<p>To write an ORC file, you need to include <code>OrcFile.hh</code> and define
+the schema; then use <code>orc::OutputStream</code> and <code>orc::WriterOptions</code>
+to create a <code>orc::Writer</code> with the desired filename. This example
+sets the required schema parameter, but there are many other
+options to control the ORC writer.</p>
 
+<pre><code class="language-cpp">ORC_UNIQUE_PTR&lt;OutputStream&gt; outStream =
+  writeLocalFile("my-file.orc");
+ORC_UNIQUE_PTR&lt;Type&gt; schema(
+  Type::buildTypeFromString("struct&lt;x:int,y:int&gt;"));
+WriterOptions options;
+ORC_UNIQUE_PTR&lt;Writer&gt; writer =
+  createWriter(*schema, outStream.get(), options);
+</code></pre>
 
-</ul>
+<p>Now you need to create a row batch, set the data, and write it to the file
+as the batch fills up. When the file is done, close the <code>Writer</code>.</p>
 
-    
-    <h4>Installing</h4>
-    
+<pre><code class="language-cpp">uint64_t batchSize = 1024, rowCount = 10000;
+ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; batch =
+  writer-&gt;createRowBatch(batchSize);
+StructVectorBatch *root =
+  dynamic_cast&lt;StructVectorBatch *&gt;(batch.get());
+LongVectorBatch *x =
+  dynamic_cast&lt;LongVectorBatch *&gt;(root-&gt;fields[0]);
+LongVectorBatch *y =
+  dynamic_cast&lt;LongVectorBatch *&gt;(root-&gt;fields[1]);
 
-<ul>
+uint64_t rows = 0;
+for (uint64_t i = 0; i &lt; rowCount; ++i) {
+  x-&gt;data[rows] = i;
+  y-&gt;data[rows] = i * 3;
+  rows++;
 
-  
+  if (rows == batchSize) {
+    root-&gt;numElements = rows;
+    x-&gt;numElements = rows;
+    y-&gt;numElements = rows;
 
-  
-    
-  
+    writer-&gt;add(*batch);
+    rows = 0;
+  }
+}
 
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/building.html">Building ORC</a></li>
-      
+if (rows != 0) {
+  root-&gt;numElements = rows;
+  x-&gt;numElements = rows;
+  y-&gt;numElements = rows;
 
+  writer-&gt;add(*batch);
+  rows = 0;
+}
 
-  
+writer-&gt;close();
+</code></pre>
 
-  
-    
-  
+<h2 id="reading-orc-files">Reading ORC Files</h2>
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/releases.html">Releases</a></li>
-      
+<p>To read ORC files, include <code>OrcFile.hh</code> file to create a <code>orc::Reader</code>
+that contains the metadata about the file. There are a few options to
+the <code>orc::Reader</code>, but far fewer than the writer and none of them are
+required. The reader has methods for getting the number of rows,
+schema, compression, etc. from the file.</p>
 
+<pre><code class="language-cpp">ORC_UNIQUE_PTR&lt;InputStream&gt; inStream =
+  readLocalFile("my-file.orc");
+ReaderOptions options;
+ORC_UNIQUE_PTR&lt;Reader&gt; reader =
+  createReader(inStream, options);
+</code></pre>
+
+<p>To get the data, create a <code>orc::RowReader</code> object. By default,
+the RowReader reads all rows and all columns, but there are
+options to control the data that is read.</p>
+
+<pre><code class="language-cpp">RowReaderOptions rowReaderOptions;
+ORC_UNIQUE_PTR&lt;RowReader&gt; rowReader =
+  reader-&gt;createRowReader(rowReaderOptions);
+ORC_UNIQUE_PTR&lt;ColumnVectorBatch&gt; batch =
+  rowReader-&gt;createRowBatch(1024);
+</code></pre>
+
+<p>With a <code>orc::RowReader</code> the user can ask for the next batch until there
+are no more left. The reader will stop the batch at certain boundaries,
+so the returned batch may not be full, but it will always contain some rows.</p>
+
+<pre><code class="language-cpp">while (rowReader-&gt;next(*batch)) {
+  for (uint64_t r = 0; r &lt; batch-&gt;numElements; ++r) {
+    ... process row r from batch
+  }
+}
+</code></pre>
+
+          
 
-</ul>
 
-    
-    <h4>Using in Hive</h4>
-    
 
-<ul>
 
-  
 
   
-    
   
 
   
-    
-  
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
-      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
-      
-
 
   
+  
 
   
-    
   
 
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/core-java.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/cpp-tools.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
     
-      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
-      
 
+        </article>
+      </div>
 
-</ul>
-
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
     
-    <h4>Using in MapReduce</h4>
+    <h4>Overview</h4>
     
 
 <ul>
@@ -1943,19 +1080,21 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
   
-    
-  
-    
+
   
     
   
-    
+
   
     
   
     
-      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
       
 
 
@@ -1995,20 +1134,10 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
-    
-      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      <li class=""><a href="/docs/types.html">Types</a></li>
       
 
 
-</ul>
-
-    
-    <h4>Using ORC Core</h4>
-    
-
-<ul>
-
   
 
   
@@ -2027,34 +1156,34 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
-      
-
-
-  
-
   
     
   
-
-  
     
   
     
   
     
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
   
     
   
+
+  
     
-      <li class="current"><a href="/docs/core-cpp.html">Using Core C++</a></li>
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
       
 
 
 </ul>
 
     
-    <h4>Tools</h4>
+    <h4>Installing</h4>
     
 
 <ul>
@@ -2071,15 +1200,7 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
       
 
 
@@ -2117,14 +1238,14 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
       
 
 
 </ul>
 
     
-    <h4>Format Specification</h4>
+    <h4>Using in Hive</h4>
     
 
 <ul>
@@ -2151,31 +1272,7 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
       
 
 
@@ -2199,31 +1296,17 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
-    
-  
-    
-      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
       
 
 
-  
-
-  
-    
-  
+</ul>
 
-  
-    
-  
     
-  
-    
-  
+    <h4>Using in MapReduce</h4>
     
-      <li class=""><a href="/docs/compression.html">Compression</a></li>
-      
 
+<ul>
 
   
 
@@ -2255,19 +1338,7 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
       
 
 
@@ -2303,13 +1374,25 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
     
-  
+    <h4>Using ORC Core</h4>
     
+
+<ul>
+
+  
+
   
     
   
+
+  
     
   
     
@@ -2319,7 +1402,7 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
       
 
 
@@ -2337,17 +1420,17 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-  
-    
-  
-    
-  
+      <li class="current"><a href="/docs/core-cpp.html">Using Core C++</a></li>
+      
+
+
+</ul>
+
     
-  
+    <h4>Tools</h4>
     
-      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
-      
 
+<ul>
 
   
 
@@ -2367,11 +1450,17 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
+      <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
+      
+
+
   
-    
+
   
     
   
+
+  
     
   
     
@@ -2393,7 +1482,7 @@ so the returned batch may not be full, but it will always contain some rows.</p>
     
   
     
-      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
       
 
 

http://git-wip-us.apache.org/repos/asf/orc/blob/c6e29090/docs/core-java.html
----------------------------------------------------------------------
diff --git a/docs/core-java.html b/docs/core-java.html
index 196bf0d..ca4e99b 100644
--- a/docs/core-java.html
+++ b/docs/core-java.html
@@ -109,12 +109,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/index.html">Background</option>
     
   
@@ -130,14 +124,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -174,20 +160,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -221,20 +193,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
       <option value="/docs/types.html">Types</option>
     
   
@@ -261,12 +219,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/indexes.html">Indexes</option>
     
   
@@ -280,14 +232,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -324,20 +268,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -381,20 +311,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -426,25 +342,11 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/releases.html">Releases</option>
     
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -471,12 +373,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/hive-ddl.html">Hive DDL</option>
     
   
@@ -494,14 +390,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -519,12 +407,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/hive-config.html">Hive Configuration</option>
     
   
@@ -544,14 +426,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -586,12 +460,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/mapred.html">Using in MapRed</option>
     
   
@@ -601,14 +469,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -638,12 +498,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/mapreduce.html">Using in MapReduce</option>
     
   
@@ -651,14 +505,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -679,8 +525,6 @@
     
   
     
-  
-    
       <option value="/docs/core-java.html">Using Core Java</option>
     
   
@@ -704,18 +548,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -727,8 +559,6 @@
     
   
     
-  
-    
       <option value="/docs/core-cpp.html">Using Core C++</option>
     
   
@@ -754,18 +584,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -788,8 +606,6 @@
     
   
     
-  
-    
       <option value="/docs/cpp-tools.html">C++ Tools</option>
     
   
@@ -811,18 +627,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -848,12 +652,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/java-tools.html">Java Tools</option>
     
   
@@ -865,385 +663,20 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
     
-    <optgroup label="Format Specification">
-      
+  </select>
+</div>
 
 
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/spec-intro.html">Introduction</option>
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/file-tail.html">File Tail</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/compression.html">Compression</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/run-length.html">Run Length Encoding</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/stripes.html">Stripes</option>
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/encodings.html">Column Encodings</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/spec-index.html">Indexes</option>
-    
-  
-    
-  
-    
-  
-    
-  
-
-
-    </optgroup>
-    
-  </select>
-</div>
-
-
-      <div class="unit four-fifths">
-        <article>
-          <h1>Using Core Java</h1>
-          <p>The Core ORC API reads and writes ORC files into Hive’s storage-api
-vectorized classes. Both Hive and MapReduce use the Core API to actually
-read and write the data.</p>
+      <div class="unit four-fifths">
+        <article>
+          <h1>Using Core Java</h1>
+          <p>The Core ORC API reads and writes ORC files into Hive’s storage-api
+vectorized classes. Both Hive and MapReduce use the Core API to actually
+read and write the data.</p>
 
 <h2 id="vectorized-row-batch">Vectorized Row Batch</h2>
 
@@ -1646,289 +1079,63 @@ rows.close();
   
 
   
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-    <div class="section-nav">
-      <div class="left align-right">
-          
-            
-            
-            <a href="/docs/mapreduce.html" class="prev">Back</a>
-          
-      </div>
-      <div class="right align-left">
-          
-            
-            
-            <a href="/docs/core-cpp.html" class="next">Next</a>
-          
-      </div>
-    </div>
-    <div class="clear"></div>
-    
-
-        </article>
-      </div>
-
-      <div class="unit one-fifth hide-on-mobiles">
-  <aside>
-    
-    <h4>Overview</h4>
-    
-
-<ul>
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/index.html">Background</a></li>
-      
-
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
-      
-
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/types.html">Types</a></li>
-      
-
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
-      
-
-
-  
-
-  
-    
-  
-
-  
-    
-      <li class=""><a href="/docs/acid.html">ACID support</a></li>
-      
-
-
-</ul>
-
-    
-    <h4>Installing</h4>
-    
-
-<ul>
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/building.html">Building ORC</a></li>
-      
-
-
-  
+  
 
   
-    
   
 
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/mapreduce.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/core-cpp.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
     
-      <li class=""><a href="/docs/releases.html">Releases</a></li>
-      
 
+        </article>
+      </div>
 
-</ul>
-
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
     
-    <h4>Using in Hive</h4>
+    <h4>Overview</h4>
     
 
 <ul>
@@ -1957,11 +1164,7 @@ rows.close();
     
   
     
-  
-    
-  
-    
-      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      <li class=""><a href="/docs/index.html">Background</a></li>
       
 
 
@@ -1975,34 +1178,10 @@ rows.close();
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
       
 
 
-</ul>
-
-    
-    <h4>Using in MapReduce</h4>
-    
-
-<ul>
-
   
 
   
@@ -2039,7 +1218,7 @@ rows.close();
     
   
     
-      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+      <li class=""><a href="/docs/types.html">Types</a></li>
       
 
 
@@ -2069,49 +1248,7 @@ rows.close();
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
-      
-
-
-</ul>
-
-    
-    <h4>Using ORC Core</h4>
-    
-
-<ul>
-
-  
-
-  
-    
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class="current"><a href="/docs/core-java.html">Using Core Java</a></li>
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
       
 
 
@@ -2123,22 +1260,14 @@ rows.close();
 
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
       
 
 
 </ul>
 
     
-    <h4>Tools</h4>
+    <h4>Installing</h4>
     
 
 <ul>
@@ -2155,15 +1284,7 @@ rows.close();
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
       
 
 
@@ -2201,14 +1322,14 @@ rows.close();
     
   
     
-      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
       
 
 
 </ul>
 
     
-    <h4>Format Specification</h4>
+    <h4>Using in Hive</h4>
     
 
 <ul>
@@ -2235,31 +1356,7 @@ rows.close();
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
       
 
 
@@ -2283,31 +1380,17 @@ rows.close();
     
   
     
-  
-    
-  
-    
-      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
       
 
 
-  
-
-  
-    
-  
+</ul>
 
-  
-    
-  
     
-  
-    
-  
+    <h4>Using in MapReduce</h4>
     
-      <li class=""><a href="/docs/compression.html">Compression</a></li>
-      
 
+<ul>
 
   
 
@@ -2339,19 +1422,7 @@ rows.close();
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
       
 
 
@@ -2387,13 +1458,25 @@ rows.close();
     
   
     
-  
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
     
-  
+    <h4>Using ORC Core</h4>
     
+
+<ul>
+
+  
+
   
     
   
+
+  
     
   
     
@@ -2403,7 +1486,7 @@ rows.close();
     
   
     
-      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      <li class="current"><a href="/docs/core-java.html">Using Core Java</a></li>
       
 
 
@@ -2421,17 +1504,17 @@ rows.close();
     
   
     
-  
-    
-  
-    
-  
+      <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
+      
+
+
+</ul>
+
     
-  
+    <h4>Tools</h4>
     
-      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
-      
 
+<ul>
 
   
 
@@ -2451,11 +1534,17 @@ rows.close();
     
   
     
+      <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
+      
+
+
   
-    
+
   
     
   
+
+  
     
   
     
@@ -2477,7 +1566,7 @@ rows.close();
     
   
     
-      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
       
 
 

http://git-wip-us.apache.org/repos/asf/orc/blob/c6e29090/docs/cpp-tools.html
----------------------------------------------------------------------
diff --git a/docs/cpp-tools.html b/docs/cpp-tools.html
index 171dc0d..abe6e2e 100644
--- a/docs/cpp-tools.html
+++ b/docs/cpp-tools.html
@@ -109,12 +109,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/index.html">Background</option>
     
   
@@ -130,14 +124,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -174,20 +160,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -221,20 +193,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
       <option value="/docs/types.html">Types</option>
     
   
@@ -261,12 +219,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/indexes.html">Indexes</option>
     
   
@@ -280,14 +232,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -324,20 +268,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -381,20 +311,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -426,25 +342,11 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/releases.html">Releases</option>
     
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -471,12 +373,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/hive-ddl.html">Hive DDL</option>
     
   
@@ -494,14 +390,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -519,12 +407,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/hive-config.html">Hive Configuration</option>
     
   
@@ -544,14 +426,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -586,12 +460,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/mapred.html">Using in MapRed</option>
     
   
@@ -601,14 +469,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -638,12 +498,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/mapreduce.html">Using in MapReduce</option>
     
   
@@ -651,14 +505,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -679,8 +525,6 @@
     
   
     
-  
-    
       <option value="/docs/core-java.html">Using Core Java</option>
     
   
@@ -704,18 +548,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -727,8 +559,6 @@
     
   
     
-  
-    
       <option value="/docs/core-cpp.html">Using Core C++</option>
     
   
@@ -754,18 +584,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
@@ -788,8 +606,6 @@
     
   
     
-  
-    
       <option value="/docs/cpp-tools.html">C++ Tools</option>
     
   
@@ -811,18 +627,6 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
 
   
 
@@ -848,12 +652,6 @@
     
   
     
-  
-    
-  
-    
-  
-    
       <option value="/docs/java-tools.html">Java Tools</option>
     
   
@@ -865,1004 +663,343 @@
   
     
   
-    
-  
-    
-  
-    
-  
-    
-  
 
 
     </optgroup>
     
-    <optgroup label="Format Specification">
-      
+  </select>
+</div>
 
 
-  
+      <div class="unit four-fifths">
+        <article>
+          <h1>C++ Tools</h1>
+          <h2 id="orc-contents">orc-contents</h2>
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/spec-intro.html">Introduction</option>
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/file-tail.html">File Tail</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/compression.html">Compression</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/run-length.html">Run Length Encoding</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/stripes.html">Stripes</option>
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/encodings.html">Column Encodings</option>
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-
-  
-
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <option value="/docs/spec-index.html">Indexes</option>
-    
-  
-    
-  
-    
-  
-    
-  
-
-
-    </optgroup>
-    
-  </select>
-</div>
-
-
-      <div class="unit four-fifths">
-        <article>
-          <h1>C++ Tools</h1>
-          <h2 id="orc-contents">orc-contents</h2>
-
-<p>Displays the contents of the ORC file as a JSON document. With the
-<code>columns</code> argument only the selected columns are printed.</p>
-
-<pre><code class="language-shell">% orc-contents  [--columns=1,2,...] &lt;filename&gt;
-</code></pre>
-
-<p>If you run it on the example file TestOrcFile.test1.orc, you’ll see (without
-the line breaks within each record):</p>
-
-<pre><code class="language-shell">% orc-contents examples/TestOrcFile.test1.orc
-{"boolean1": false, "byte1": 1, "short1": 1024, "int1": 65536, \\
- "long1": 9223372036854775807, "float1": 1, "double1": -15, \\
- "bytes1": [0, 1, 2, 3, 4], "string1": "hi", "middle": \\
-    {"list": [{"int1": 1, "string1": "bye"}, \\
-              {"int1": 2, "string1": "sigh"}]}, \\
- "list": [{"int1": 3, "string1": "good"}, \\
-          {"int1": 4, "string1": "bad"}], \\
- "map": []}
-{"boolean1": true, "byte1": 100, "short1": 2048, "int1": 65536,
- "long1": 9223372036854775807, "float1": 2, "double1": -5, \\
- "bytes1": [], "string1": "bye", \\
- "middle": {"list": [{"int1": 1, "string1": "bye"}, \\
-                     {"int1": 2, "string1": "sigh"}]}, \\
- "list": [{"int1": 100000000, "string1": "cat"}, \\
-          {"int1": -100000, "string1": "in"}, \\
-          {"int1": 1234, "string1": "hat"}], \\
- "map": [{"key": "chani", "value": {"int1": 5, "string1": "chani"}}, \\
-         {"key": "mauddib", \\
-          "value": {"int1": 1, "string1": "mauddib"}}]}
-</code></pre>
-
-<h2 id="orc-metadata">orc-metadata</h2>
-
-<p>Displays the metadata of the ORC file as a JSON document. With the
-<code>verbose</code> option additional information about the layout of the file
-is also printed.</p>
-
-<p>For diagnosing problems, it is useful to use the ‘–raw’ option that
-prints the protocol buffers from the ORC file directly rather than
-interpreting them.</p>
-
-<pre><code class="language-shell">% orc-metadata [-v] [--raw] &lt;filename&gt;
-</code></pre>
-
-<p>If you run it on the example file TestOrcFile.test1.orc, you’ll see:</p>
-
-<pre><code class="language-shell">% orc-metadata examples/TestOrcFile.test1.orc
-{ "name": "../examples/TestOrcFile.test1.orc",
-  "type": "struct&lt;boolean1:boolean,byte1:tinyint,short1:smallint,
-int1:int,long1:bigint,float1:float,double1:double,bytes1:binary,
-string1:string,middle:struct&lt;list:array&lt;struct&lt;int1:int,string1:
-string&gt;&gt;&gt;,list:array&lt;struct&lt;int1:int,string1:string&gt;&gt;,map:map&lt;
-string,struct&lt;int1:int,string1:string&gt;&gt;&gt;",
-  "rows": 2,
-  "stripe count": 1,
-  "format": "0.12", "writer version": "HIVE-8732",
-  "compression": "zlib", "compression block": 10000,
-  "file length": 1711,
-  "content": 1015, "stripe stats": 250, "footer": 421, "postscript": 24,
-  "row index stride": 10000,
-  "user metadata": {
-  },
-  "stripes": [
-    { "stripe": 0, "rows": 2,
-      "offset": 3, "length": 1012,
-      "index": 570, "data": 243, "footer": 199
-    }
-  ]
-}
-</code></pre>
-
-<h2 id="csv-import">csv-import</h2>
-
-<p>Imports CSV file into an Orc file using the specified schema.
-Compound types are not yet supported. <code>delimiter</code> option indicates
-the delimiter in the input CSV file and by default is <code>,</code>. <code>stripe</code>
-option means the stripe size and set to 128MB by default. <code>block</code>
-option is compression block size which is 64KB by default. <code>batch</code>
-option is by default 1024 rows for one batch.</p>
-
-<pre><code class="language-shell">% csv-import [--delimiter=&lt;character&gt;] [--stripe=&lt;size&gt;]
-             [--block=&lt;size&gt;] [--batch=&lt;size&gt;]
-             &lt;schema&gt; &lt;inputCSVFile&gt; &lt;outputORCFile&gt;
-</code></pre>
-
-<p>If you run it on the example file TestCSVFileImport.test10rows.csv,
-you’ll see:</p>
-
-<pre><code class="language-shell">% csv-import "struct&lt;a:bigint,b:string,c:double&gt;"
-             examples/TestCSVFileImport.test10rows.csv /tmp/test.orc
-[2018-04-11 11:12:16] Start importing Orc file...
-[2018-04-11 11:12:16] Finish importing Orc file.
-[2018-04-11 11:12:16] Total writer elasped time: 0.001352s.
-[2018-04-11 11:12:16] Total writer CPU time: 0.001339s.
-</code></pre>
-
-<h2 id="orc-scan">orc-scan</h2>
-
-<p>Scans and displays the row count of the ORC file. With the <code>batch</code> option
-to set the batch size which is 1024 rows by default. It is useful to check
-if the ORC file is damaged.</p>
-
-<pre><code class="language-shell">% orc-scan [--batch=&lt;size&gt;] &lt;filename&gt;
-</code></pre>
-
-<p>If you run it on the example file TestOrcFile.test1.orc, you’ll see:</p>
-
-<pre><code class="language-shell">% orc-scan examples/TestOrcFile.test1.orc
-Rows: 2
-Batches: 1
-</code></pre>
-
-<h2 id="orc-statistics">orc-statistics</h2>
-
-<p>Displays the file-level and stripe-level column statistics of the ORC file.
-With the <code>withIndex</code> option to include column statistics in each row group.</p>
-
-<pre><code class="language-shell">% orc-statistics [--withIndex] &lt;filename&gt;
-</code></pre>
-
-<p>If you run it on the example file TestOrcFile.TestOrcFile.columnProjection.orc
-you’ll see:</p>
-
-<pre><code class="language-shell">% orc-statistics examples/TestOrcFile.columnProjection.orc
-File examples/TestOrcFile.columnProjection.orc has 3 columns
-*** Column 0 ***
-Column has 21000 values and has null value: no
-
-*** Column 1 ***
-Data type: Integer
-Values: 21000
-Has null: no
-Minimum: -2147439072
-Maximum: 2147257982
-Sum: 268482658568
-
-*** Column 2 ***
-Data type: String
-Values: 21000
-Has null: no
-Minimum: 100119c272d7db89
-Maximum: fffe9f6f23b287f3
-Total length: 334559
-
-File examples/TestOrcFile.columnProjection.orc has 5 stripes
-*** Stripe 0 ***
-
---- Column 0 ---
-Column has 5000 values and has null value: no
-
---- Column 1 ---
-Data type: Integer
-Values: 5000
-Has null: no
-Minimum: -2145365268
-Maximum: 2147025027
-Sum: -29841423854
-
---- Column 2 ---
-Data type: String
-Values: 5000
-Has null: no
-Minimum: 1005350489418be2
-Maximum: fffbb8718c92b09f
-Total length: 79644
-
-*** Stripe 1 ***
-
---- Column 0 ---
-Column has 5000 values and has null value: no
-
---- Column 1 ---
-Data type: Integer
-Values: 5000
-Has null: no
-Minimum: -2147115959
-Maximum: 2147257982
-Sum: 108604887785
-
---- Column 2 ---
-Data type: String
-Values: 5000
-Has null: no
-Minimum: 100119c272d7db89
-Maximum: fff0ae41d41e6afc
-Total length: 79640
-
-*** Stripe 2 ***
-
---- Column 0 ---
-Column has 5000 values and has null value: no
-
---- Column 1 ---
-Data type: Integer
-Values: 5000
-Has null: no
-Minimum: -2145932387
-Maximum: 2145877119
-Sum: 70064190848
-
---- Column 2 ---
-Data type: String
-Values: 5000
-Has null: no
-Minimum: 10130af874ae036c
-Maximum: fffe9f6f23b287f3
-Total length: 79645
-
-*** Stripe 3 ***
-
---- Column 0 ---
-Column has 5000 values and has null value: no
-
---- Column 1 ---
-Data type: Integer
-Values: 5000
-Has null: no
-Minimum: -2147439072
-Maximum: 2147074354
-Sum: 104681356482
-
---- Column 2 ---
-Data type: String
-Values: 5000
-Has null: no
-Minimum: 102547d48ed06518
-Maximum: fffa47c57dc7b69a
-Total length: 79689
-
-*** Stripe 4 ***
-
---- Column 0 ---
-Column has 1000 values and has null value: no
-
---- Column 1 ---
-Data type: Integer
-Values: 1000
-Has null: no
-Minimum: -2141222223
-Maximum: 2145816096
-Sum: 14973647307
+<p>Displays the contents of the ORC file as a JSON document. With the
+<code>columns</code> argument only the selected columns are printed.</p>
 
---- Column 2 ---
-Data type: String
-Values: 1000
-Has null: no
-Minimum: 1059d81c9025a217
-Maximum: ffc17f0e35e1a6c0
-Total length: 15941
+<pre><code class="language-shell">% orc-contents  [--columns=1,2,...] &lt;filename&gt;
 </code></pre>
 
-          
-
-
-
-
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
-
-  
-  
+<p>If you run it on the example file TestOrcFile.test1.orc, you’ll see (without
+the line breaks within each record):</p>
 
-  
-  
+<pre><code class="language-shell">% orc-contents examples/TestOrcFile.test1.orc
+{"boolean1": false, "byte1": 1, "short1": 1024, "int1": 65536, \\
+ "long1": 9223372036854775807, "float1": 1, "double1": -15, \\
+ "bytes1": [0, 1, 2, 3, 4], "string1": "hi", "middle": \\
+    {"list": [{"int1": 1, "string1": "bye"}, \\
+              {"int1": 2, "string1": "sigh"}]}, \\
+ "list": [{"int1": 3, "string1": "good"}, \\
+          {"int1": 4, "string1": "bad"}], \\
+ "map": []}
+{"boolean1": true, "byte1": 100, "short1": 2048, "int1": 65536,
+ "long1": 9223372036854775807, "float1": 2, "double1": -5, \\
+ "bytes1": [], "string1": "bye", \\
+ "middle": {"list": [{"int1": 1, "string1": "bye"}, \\
+                     {"int1": 2, "string1": "sigh"}]}, \\
+ "list": [{"int1": 100000000, "string1": "cat"}, \\
+          {"int1": -100000, "string1": "in"}, \\
+          {"int1": 1234, "string1": "hat"}], \\
+ "map": [{"key": "chani", "value": {"int1": 5, "string1": "chani"}}, \\
+         {"key": "mauddib", \\
+          "value": {"int1": 1, "string1": "mauddib"}}]}
+</code></pre>
 
-  
-  
+<h2 id="orc-metadata">orc-metadata</h2>
 
-  
-  
+<p>Displays the metadata of the ORC file as a JSON document. With the
+<code>verbose</code> option additional information about the layout of the file
+is also printed.</p>
 
-  
-  
+<p>For diagnosing problems, it is useful to use the ‘–raw’ option that
+prints the protocol buffers from the ORC file directly rather than
+interpreting them.</p>
 
-  
-  
+<pre><code class="language-shell">% orc-metadata [-v] [--raw] &lt;filename&gt;
+</code></pre>
 
-  
-  
-    <div class="section-nav">
-      <div class="left align-right">
-          
-            
-            
-            <a href="/docs/core-cpp.html" class="prev">Back</a>
-          
-      </div>
-      <div class="right align-left">
-          
-            
-            
-            <a href="/docs/java-tools.html" class="next">Next</a>
-          
-      </div>
-    </div>
-    <div class="clear"></div>
-    
+<p>If you run it on the example file TestOrcFile.test1.orc, you’ll see:</p>
 
-        </article>
-      </div>
+<pre><code class="language-shell">% orc-metadata examples/TestOrcFile.test1.orc
+{ "name": "../examples/TestOrcFile.test1.orc",
+  "type": "struct&lt;boolean1:boolean,byte1:tinyint,short1:smallint,
+int1:int,long1:bigint,float1:float,double1:double,bytes1:binary,
+string1:string,middle:struct&lt;list:array&lt;struct&lt;int1:int,string1:
+string&gt;&gt;&gt;,list:array&lt;struct&lt;int1:int,string1:string&gt;&gt;,map:map&lt;
+string,struct&lt;int1:int,string1:string&gt;&gt;&gt;",
+  "rows": 2,
+  "stripe count": 1,
+  "format": "0.12", "writer version": "HIVE-8732",
+  "compression": "zlib", "compression block": 10000,
+  "file length": 1711,
+  "content": 1015, "stripe stats": 250, "footer": 421, "postscript": 24,
+  "row index stride": 10000,
+  "user metadata": {
+  },
+  "stripes": [
+    { "stripe": 0, "rows": 2,
+      "offset": 3, "length": 1012,
+      "index": 570, "data": 243, "footer": 199
+    }
+  ]
+}
+</code></pre>
 
-      <div class="unit one-fifth hide-on-mobiles">
-  <aside>
-    
-    <h4>Overview</h4>
-    
+<h2 id="csv-import">csv-import</h2>
 
-<ul>
+<p>Imports CSV file into an Orc file using the specified schema.
+Compound types are not yet supported. <code>delimiter</code> option indicates
+the delimiter in the input CSV file and by default is <code>,</code>. <code>stripe</code>
+option means the stripe size and set to 128MB by default. <code>block</code>
+option is compression block size which is 64KB by default. <code>batch</code>
+option is by default 1024 rows for one batch.</p>
 
-  
+<pre><code class="language-shell">% csv-import [--delimiter=&lt;character&gt;] [--stripe=&lt;size&gt;]
+             [--block=&lt;size&gt;] [--batch=&lt;size&gt;]
+             &lt;schema&gt; &lt;inputCSVFile&gt; &lt;outputORCFile&gt;
+</code></pre>
 
-  
-    
-  
+<p>If you run it on the example file TestCSVFileImport.test10rows.csv,
+you’ll see:</p>
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/index.html">Background</a></li>
-      
+<pre><code class="language-shell">% csv-import "struct&lt;a:bigint,b:string,c:double&gt;"
+             examples/TestCSVFileImport.test10rows.csv /tmp/test.orc
+[2018-04-11 11:12:16] Start importing Orc file...
+[2018-04-11 11:12:16] Finish importing Orc file.
+[2018-04-11 11:12:16] Total writer elasped time: 0.001352s.
+[2018-04-11 11:12:16] Total writer CPU time: 0.001339s.
+</code></pre>
 
+<h2 id="orc-scan">orc-scan</h2>
 
-  
+<p>Scans and displays the row count of the ORC file. With the <code>batch</code> option
+to set the batch size which is 1024 rows by default. It is useful to check
+if the ORC file is damaged.</p>
 
-  
-    
-  
+<pre><code class="language-shell">% orc-scan [--batch=&lt;size&gt;] &lt;filename&gt;
+</code></pre>
 
-  
-    
-  
-    
-      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
-      
+<p>If you run it on the example file TestOrcFile.test1.orc, you’ll see:</p>
 
+<pre><code class="language-shell">% orc-scan examples/TestOrcFile.test1.orc
+Rows: 2
+Batches: 1
+</code></pre>
 
-  
+<h2 id="orc-statistics">orc-statistics</h2>
 
-  
-    
-  
+<p>Displays the file-level and stripe-level column statistics of the ORC file.
+With the <code>withIndex</code> option to include column statistics in each row group.</p>
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/types.html">Types</a></li>
-      
+<pre><code class="language-shell">% orc-statistics [--withIndex] &lt;filename&gt;
+</code></pre>
 
+<p>If you run it on the example file TestOrcFile.TestOrcFile.columnProjection.orc
+you’ll see:</p>
 
-  
+<pre><code class="language-shell">% orc-statistics examples/TestOrcFile.columnProjection.orc
+File examples/TestOrcFile.columnProjection.orc has 3 columns
+*** Column 0 ***
+Column has 21000 values and has null value: no
 
-  
-    
-  
+*** Column 1 ***
+Data type: Integer
+Values: 21000
+Has null: no
+Minimum: -2147439072
+Maximum: 2147257982
+Sum: 268482658568
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
-      
+*** Column 2 ***
+Data type: String
+Values: 21000
+Has null: no
+Minimum: 100119c272d7db89
+Maximum: fffe9f6f23b287f3
+Total length: 334559
 
+File examples/TestOrcFile.columnProjection.orc has 5 stripes
+*** Stripe 0 ***
 
-  
+--- Column 0 ---
+Column has 5000 values and has null value: no
 
-  
-    
-  
+--- Column 1 ---
+Data type: Integer
+Values: 5000
+Has null: no
+Minimum: -2145365268
+Maximum: 2147025027
+Sum: -29841423854
 
-  
-    
-      <li class=""><a href="/docs/acid.html">ACID support</a></li>
-      
+--- Column 2 ---
+Data type: String
+Values: 5000
+Has null: no
+Minimum: 1005350489418be2
+Maximum: fffbb8718c92b09f
+Total length: 79644
 
+*** Stripe 1 ***
 
-</ul>
+--- Column 0 ---
+Column has 5000 values and has null value: no
 
-    
-    <h4>Installing</h4>
-    
+--- Column 1 ---
+Data type: Integer
+Values: 5000
+Has null: no
+Minimum: -2147115959
+Maximum: 2147257982
+Sum: 108604887785
 
-<ul>
+--- Column 2 ---
+Data type: String
+Values: 5000
+Has null: no
+Minimum: 100119c272d7db89
+Maximum: fff0ae41d41e6afc
+Total length: 79640
 
-  
+*** Stripe 2 ***
 
-  
-    
-  
+--- Column 0 ---
+Column has 5000 values and has null value: no
 
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/building.html">Building ORC</a></li>
-      
+--- Column 1 ---
+Data type: Integer
+Values: 5000
+Has null: no
+Minimum: -2145932387
+Maximum: 2145877119
+Sum: 70064190848
 
+--- Column 2 ---
+Data type: String
+Values: 5000
+Has null: no
+Minimum: 10130af874ae036c
+Maximum: fffe9f6f23b287f3
+Total length: 79645
 
-  
+*** Stripe 3 ***
 
-  
-    
-  
+--- Column 0 ---
+Column has 5000 values and has null value: no
 
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/releases.html">Releases</a></li>
-      
+--- Column 1 ---
+Data type: Integer
+Values: 5000
+Has null: no
+Minimum: -2147439072
+Maximum: 2147074354
+Sum: 104681356482
 
+--- Column 2 ---
+Data type: String
+Values: 5000
+Has null: no
+Minimum: 102547d48ed06518
+Maximum: fffa47c57dc7b69a
+Total length: 79689
+
+*** Stripe 4 ***
+
+--- Column 0 ---
+Column has 1000 values and has null value: no
+
+--- Column 1 ---
+Data type: Integer
+Values: 1000
+Has null: no
+Minimum: -2141222223
+Maximum: 2145816096
+Sum: 14973647307
+
+--- Column 2 ---
+Data type: String
+Values: 1000
+Has null: no
+Minimum: 1059d81c9025a217
+Maximum: ffc17f0e35e1a6c0
+Total length: 15941
+</code></pre>
+
+          
 
-</ul>
 
-    
-    <h4>Using in Hive</h4>
-    
 
-<ul>
 
-  
 
   
-    
   
 
   
-    
-  
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
-      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
-      
 
+  
+  
 
   
+  
 
   
-    
   
 
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
-    
+
   
-    
   
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/core-cpp.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/java-tools.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
     
-      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
-      
 
+        </article>
+      </div>
 
-</ul>
-
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
     
-    <h4>Using in MapReduce</h4>
+    <h4>Overview</h4>
     
 
 <ul>
@@ -1891,19 +1028,21 @@ Total length: 15941
     
   
     
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
   
-    
-  
-    
+
   
     
   
-    
+
   
     
   
     
-      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
       
 
 
@@ -1943,20 +1082,10 @@ Total length: 15941
     
   
     
-  
-    
-      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      <li class=""><a href="/docs/types.html">Types</a></li>
       
 
 
-</ul>
-
-    
-    <h4>Using ORC Core</h4>
-    
-
-<ul>
-
   
 
   
@@ -1975,34 +1104,34 @@ Total length: 15941
     
   
     
-      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
-      
-
-
-  
-
   
     
   
-
-  
     
   
     
   
     
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
   
     
   
+
+  
     
-      <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
       
 
 
 </ul>
 
     
-    <h4>Tools</h4>
+    <h4>Installing</h4>
     
 
 <ul>
@@ -2019,15 +1148,7 @@ Total length: 15941
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class="current"><a href="/docs/cpp-tools.html">C++ Tools</a></li>
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
       
 
 
@@ -2065,14 +1186,14 @@ Total length: 15941
     
   
     
-      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
       
 
 
 </ul>
 
     
-    <h4>Format Specification</h4>
+    <h4>Using in Hive</h4>
     
 
 <ul>
@@ -2099,31 +1220,7 @@ Total length: 15941
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
       
 
 
@@ -2147,31 +1244,17 @@ Total length: 15941
     
   
     
-  
-    
-  
-    
-      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
       
 
 
-  
-
-  
-    
-  
+</ul>
 
-  
-    
-  
     
-  
-    
-  
+    <h4>Using in MapReduce</h4>
     
-      <li class=""><a href="/docs/compression.html">Compression</a></li>
-      
 
+<ul>
 
   
 
@@ -2203,19 +1286,7 @@ Total length: 15941
     
   
     
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-  
-    
-      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
       
 
 
@@ -2251,13 +1322,25 @@ Total length: 15941
     
   
     
-  
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
     
-  
+    <h4>Using ORC Core</h4>
     
+
+<ul>
+
+  
+
   
     
   
+
+  
     
   
     
@@ -2267,7 +1350,7 @@ Total length: 15941
     
   
     
-      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
       
 
 
@@ -2285,17 +1368,17 @@ Total length: 15941
     
   
     
-  
-    
-  
-    
-  
+      <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
+      
+
+
+</ul>
+
     
-  
+    <h4>Tools</h4>
     
-      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
-      
 
+<ul>
 
   
 
@@ -2315,11 +1398,17 @@ Total length: 15941
     
   
     
+      <li class="current"><a href="/docs/cpp-tools.html">C++ Tools</a></li>
+      
+
+
   
-    
+
   
     
   
+
+  
     
   
     
@@ -2341,7 +1430,7 @@ Total length: 15941
     
   
     
-      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>