You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/06/16 18:30:00 UTC

[3/4] orc git commit: ORC-194. Split TreeWriters out of WriterImpl.

http://git-wip-us.apache.org/repos/asf/orc/blob/ded204a4/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index cfdddad..a5d65dd 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -20,10 +20,7 @@ package org.apache.orc.impl;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
-import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
@@ -34,43 +31,25 @@ import io.airlift.compress.lz4.Lz4Compressor;
 import io.airlift.compress.lz4.Lz4Decompressor;
 import io.airlift.compress.lzo.LzoCompressor;
 import io.airlift.compress.lzo.LzoDecompressor;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.orc.BinaryColumnStatistics;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.CompressionCodec;
 import org.apache.orc.CompressionKind;
 import org.apache.orc.MemoryManager;
-import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
 import org.apache.orc.OrcUtils;
 import org.apache.orc.PhysicalWriter;
-import org.apache.orc.StringColumnStatistics;
 import org.apache.orc.StripeInformation;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
-import org.apache.orc.util.BloomFilter;
-import org.apache.orc.util.BloomFilterIO;
-import org.apache.orc.util.BloomFilterUtf8;
+import org.apache.orc.impl.writer.TreeWriter;
+import org.apache.orc.impl.writer.WriterContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.io.Text;
 
 import com.google.protobuf.ByteString;
 
@@ -108,7 +87,6 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   private final PhysicalWriter physicalWriter;
   private final OrcFile.WriterVersion writerVersion;
 
-  private int columnCount;
   private long rowCount = 0;
   private long rowsInStripe = 0;
   private long rawDataSize = 0;
@@ -133,7 +111,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   private final boolean[] bloomFilterColumns;
   private final double bloomFilterFpp;
   private final OrcFile.BloomFilterVersion bloomFilterVersion;
-  private boolean writeTimeZone;
+  private final boolean writeTimeZone;
 
   public WriterImpl(FileSystem fs,
                     Path path,
@@ -155,6 +133,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     } else {
       callbackContext = null;
     }
+    writeTimeZone = hasTimestamp(schema);
     this.adjustedStripeSize = opts.getStripeSize();
     this.version = opts.getVersion();
     this.encodingStrategy = opts.getEncodingStrategy();
@@ -181,7 +160,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     this.physicalWriter = opts.getPhysicalWriter() == null ?
         new PhysicalFsWriter(fs, path, opts) : opts.getPhysicalWriter();
     physicalWriter.writeHeader();
-    treeWriter = createTreeWriter(schema, new StreamFactory(), false);
+    treeWriter = TreeWriter.Factory.create(schema, new StreamFactory(), false);
     if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
       throw new IllegalArgumentException("Row stride must be at least " +
           MIN_ROW_INDEX_STRIDE);
@@ -278,2524 +257,142 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     return false;
   }
 
-  private static class RowIndexPositionRecorder implements PositionRecorder {
-    private final OrcProto.RowIndexEntry.Builder builder;
 
-    RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) {
-      this.builder = builder;
-    }
-
-    @Override
-    public void addPosition(long position) {
-      builder.addPositions(position);
-    }
-  }
-
-  CompressionCodec getCustomizedCodec(OrcProto.Stream.Kind kind) {
-    // TODO: modify may create a new codec here. We want to end() it when the stream is closed,
-    //       but at this point there's no close() for the stream.
-    CompressionCodec result = physicalWriter.getCompressionCodec();
-    if (result != null) {
-      switch (kind) {
-        case BLOOM_FILTER:
-        case DATA:
-        case DICTIONARY_DATA:
-        case BLOOM_FILTER_UTF8:
-          if (compressionStrategy == OrcFile.CompressionStrategy.SPEED) {
-            result = result.modify(EnumSet.of(CompressionCodec.Modifier.FAST,
-                CompressionCodec.Modifier.TEXT));
-          } else {
-            result = result.modify(EnumSet.of(CompressionCodec.Modifier.DEFAULT,
-                CompressionCodec.Modifier.TEXT));
-          }
-          break;
-        case LENGTH:
-        case DICTIONARY_COUNT:
-        case PRESENT:
-        case ROW_INDEX:
-        case SECONDARY:
-          // easily compressed using the fastest modes
-          result = result.modify(EnumSet.of(CompressionCodec.Modifier.FASTEST,
-              CompressionCodec.Modifier.BINARY));
-          break;
-        default:
-          LOG.info("Missing ORC compression modifiers for " + kind);
-          break;
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Interface from the Writer to the TreeWriters. This limits the visibility
-   * that the TreeWriters have into the Writer.
-   */
-  private class StreamFactory {
-    /**
-     * Create a stream to store part of a column.
-     * @param column the column id for the stream
-     * @param kind the kind of stream
-     * @return The output outStream that the section needs to be written to.
-     */
-    public OutStream createStream(int column,
-                                  OrcProto.Stream.Kind kind
-                                  ) throws IOException {
-      final StreamName name = new StreamName(column, kind);
-      CompressionCodec codec = getCustomizedCodec(kind);
-
-      return new OutStream(physicalWriter.toString(), bufferSize, codec,
-          physicalWriter.createDataStream(name));
-    }
-
-    /**
-     * Get the next column id.
-     * @return a number from 0 to the number of columns - 1
-     */
-    public int getNextColumnId() {
-      return columnCount++;
-    }
-
-    /**
-     * Get the stride rate of the row index.
-     */
-    public int getRowIndexStride() {
-      return rowIndexStride;
-    }
-
-    /**
-     * Should be building the row index.
-     * @return true if we are building the index
-     */
-    public boolean buildIndex() {
-      return buildIndex;
-    }
-
-    /**
-     * Is the ORC file compressed?
-     * @return are the streams compressed
-     */
-    public boolean isCompressed() {
-      return physicalWriter.getCompressionCodec() != null;
-    }
-
-    /**
-     * Get the encoding strategy to use.
-     * @return encoding strategy
-     */
-    public OrcFile.EncodingStrategy getEncodingStrategy() {
-      return encodingStrategy;
-    }
-
-    /**
-     * Get the bloom filter columns
-     * @return bloom filter columns
-     */
-    public boolean[] getBloomFilterColumns() {
-      return bloomFilterColumns;
-    }
-
-    /**
-     * Get bloom filter false positive percentage.
-     * @return fpp
-     */
-    public double getBloomFilterFPP() {
-      return bloomFilterFpp;
-    }
-
-    /**
-     * Get the writer's configuration.
-     * @return configuration
-     */
-    public Configuration getConfiguration() {
-      return conf;
-    }
-
-    /**
-     * Get the version of the file to write.
-     */
-    public OrcFile.Version getVersion() {
-      return version;
-    }
-
-    public void useWriterTimeZone(boolean val) {
-      writeTimeZone = val;
-    }
-
-    public boolean hasWriterTimeZone() {
-      return writeTimeZone;
-    }
-
-    public OrcFile.BloomFilterVersion getBloomFilterVersion() {
-      return bloomFilterVersion;
-    }
-
-    public void writeIndex(StreamName name,
-                           OrcProto.RowIndex.Builder index) throws IOException {
-      physicalWriter.writeIndex(name, index, getCustomizedCodec(name.getKind()));
-    }
-
-    public void writeBloomFilter(StreamName name,
-                                 OrcProto.BloomFilterIndex.Builder bloom
-                                 ) throws IOException {
-      physicalWriter.writeBloomFilter(name, bloom,
-          getCustomizedCodec(name.getKind()));
-    }
-  }
-
-  /**
-   * The writers for the specific writers of each type. This provides
-   * the generic API that they must all implement.
-   */
-  interface TreeWriter {
-
-    /**
-     * Estimate the memory currently used to buffer the stripe.
-     * @return the number of bytes
-     */
-    long estimateMemory();
-
-    /**
-     * Estimate the memory used if the file was read into Hive's Writable
-     * types. This is used as an estimate for the query optimizer.
-     * @return the number of bytes
-     */
-    long getRawDataSize();
-
-    /**
-     * Write a VectorizedRowBath to the file. This is called by the WriterImpl
-     * at the top level.
-     * @param batch the list of all of the columns
-     * @param offset the first row from the batch to write
-     * @param length the number of rows to write
-     */
-    void writeRootBatch(VectorizedRowBatch batch, int offset,
-                        int length) throws IOException;
-
-    /**
-     * Write a ColumnVector to the file. This is called recursively by
-     * writeRootBatch.
-     * @param vector the data to write
-     * @param offset the first value offset to write.
-     * @param length the number of values to write
-     */
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException;
-
-    /**
-     * Create a row index entry at the current point in the stripe.
-     */
-    void createRowIndexEntry() throws IOException;
-
-    /**
-     * Write the stripe out to the file.
-     * @param stripeFooter the stripe footer that contains the information about the
-     *                layout of the stripe. The TreeWriterBase is required to update
-     *                the footer with its information.
-     * @param stats the stripe statistics information
-     * @param requiredIndexEntries the number of index entries that are
-     *                             required. this is to check to make sure the
-     *                             row index is well formed.
-     */
-    void writeStripe(OrcProto.StripeFooter.Builder stripeFooter,
-                     OrcProto.StripeStatistics.Builder stats,
-                     int requiredIndexEntries) throws IOException;
-
-    /**
-     * During a stripe append, we need to update the file statistics.
-     * @param stripeStatistics the statistics for the new stripe
-     */
-    void updateFileStatistics(OrcProto.StripeStatistics stripeStatistics);
-
-    /**
-     * Add the file statistics to the file footer.
-     * @param footer the file footer builder
-     */
-    void writeFileStatistics(OrcProto.Footer.Builder footer);
-  }
-
-  /**
-   * The parent class of all of the writers for each column. Each column
-   * is written by an instance of this class. The compound types (struct,
-   * list, map, and union) have children tree writers that write the children
-   * types.
-   */
-  private abstract static class TreeWriterBase implements TreeWriter {
-    protected final int id;
-    protected final BitFieldWriter isPresent;
-    private final boolean isCompressed;
-    protected final ColumnStatisticsImpl indexStatistics;
-    protected final ColumnStatisticsImpl stripeColStatistics;
-    protected final ColumnStatisticsImpl fileStatistics;
-    protected final RowIndexPositionRecorder rowIndexPosition;
-    private final OrcProto.RowIndex.Builder rowIndex;
-    private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
-    protected final BloomFilter bloomFilter;
-    protected final BloomFilterUtf8 bloomFilterUtf8;
-    protected final boolean createBloomFilter;
-    private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex;
-    private final OrcProto.BloomFilterIndex.Builder bloomFilterIndexUtf8;
-    protected final OrcProto.BloomFilter.Builder bloomFilterEntry;
-    private boolean foundNulls;
-    private OutStream isPresentOutStream;
-    private final StreamFactory streamFactory;
-
-    /**
-     * Create a tree writer.
-     * @param columnId the column id of the column to write
-     * @param schema the row schema
-     * @param streamFactory limited access to the Writer's data.
-     * @param nullable can the value be null?
-     */
-    TreeWriterBase(int columnId,
-                   TypeDescription schema,
-                   StreamFactory streamFactory,
-                   boolean nullable) throws IOException {
-      this.streamFactory = streamFactory;
-      this.isCompressed = streamFactory.isCompressed();
-      this.id = columnId;
-      if (nullable) {
-        isPresentOutStream = streamFactory.createStream(id,
-            OrcProto.Stream.Kind.PRESENT);
-        isPresent = new BitFieldWriter(isPresentOutStream, 1);
-      } else {
-        isPresent = null;
-      }
-      this.foundNulls = false;
-      createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
-      indexStatistics = ColumnStatisticsImpl.create(schema);
-      stripeColStatistics = ColumnStatisticsImpl.create(schema);
-      fileStatistics = ColumnStatisticsImpl.create(schema);
-      if (streamFactory.buildIndex()) {
-        rowIndex = OrcProto.RowIndex.newBuilder();
-        rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
-        rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
-      } else {
-        rowIndex = null;
-        rowIndexEntry = null;
-        rowIndexPosition = null;
-      }
-      if (createBloomFilter) {
-        bloomFilterEntry = OrcProto.BloomFilter.newBuilder();
-        if (streamFactory.getBloomFilterVersion() == OrcFile.BloomFilterVersion.ORIGINAL) {
-          bloomFilter = new BloomFilter(streamFactory.getRowIndexStride(),
-              streamFactory.getBloomFilterFPP());
-          bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder();
-        } else {
-          bloomFilter = null;
-          bloomFilterIndex = null;
-        }
-        bloomFilterUtf8 = new BloomFilterUtf8(streamFactory.getRowIndexStride(),
-            streamFactory.getBloomFilterFPP());
-        bloomFilterIndexUtf8 = OrcProto.BloomFilterIndex.newBuilder();
-      } else {
-        bloomFilterEntry = null;
-        bloomFilterIndex = null;
-        bloomFilterIndexUtf8 = null;
-        bloomFilter = null;
-        bloomFilterUtf8 = null;
-      }
-    }
-
-    protected OrcProto.RowIndex.Builder getRowIndex() {
-      return rowIndex;
-    }
-
-    protected ColumnStatisticsImpl getStripeStatistics() {
-      return stripeColStatistics;
-    }
-
-    protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() {
-      return rowIndexEntry;
-    }
-
-    IntegerWriter createIntegerWriter(PositionedOutputStream output,
-                                      boolean signed, boolean isDirectV2,
-                                      StreamFactory writer) {
-      if (isDirectV2) {
-        boolean alignedBitpacking = false;
-        if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
-          alignedBitpacking = true;
-        }
-        return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
-      } else {
-        return new RunLengthIntegerWriter(output, signed);
-      }
-    }
-
-    boolean isNewWriteFormat(StreamFactory writer) {
-      return writer.getVersion() != OrcFile.Version.V_0_11;
-    }
-
-    /**
-     * Handle the top level object write.
-     *
-     * This default method is used for all types except structs, which are the
-     * typical case. VectorizedRowBatch assumes the top level object is a
-     * struct, so we use the first column for all other types.
-     * @param batch the batch to write from
-     * @param offset the row to start on
-     * @param length the number of rows to write
-     */
-    public void writeRootBatch(VectorizedRowBatch batch, int offset,
-                               int length) throws IOException {
-      writeBatch(batch.cols[0], offset, length);
-    }
-
-    /**
-     * Write the values from the given vector from offset for length elements.
-     * @param vector the vector to write from
-     * @param offset the first value from the vector to write
-     * @param length the number of values from the vector to write
-     */
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      if (vector.noNulls) {
-        indexStatistics.increment(length);
-        if (isPresent != null) {
-          for (int i = 0; i < length; ++i) {
-            isPresent.write(1);
-          }
-        }
-      } else {
-        if (vector.isRepeating) {
-          boolean isNull = vector.isNull[0];
-          if (isPresent != null) {
-            for (int i = 0; i < length; ++i) {
-              isPresent.write(isNull ? 0 : 1);
-            }
-          }
-          if (isNull) {
-            foundNulls = true;
-            indexStatistics.setNull();
-          } else {
-            indexStatistics.increment(length);
-          }
-        } else {
-          // count the number of non-null values
-          int nonNullCount = 0;
-          for(int i = 0; i < length; ++i) {
-            boolean isNull = vector.isNull[i + offset];
-            if (!isNull) {
-              nonNullCount += 1;
-            }
-            if (isPresent != null) {
-              isPresent.write(isNull ? 0 : 1);
-            }
-          }
-          indexStatistics.increment(nonNullCount);
-          if (nonNullCount != length) {
-            foundNulls = true;
-            indexStatistics.setNull();
-          }
-        }
-      }
-    }
-
-    private void removeIsPresentPositions() {
-      for(int i=0; i < rowIndex.getEntryCount(); ++i) {
-        OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
-        List<Long> positions = entry.getPositionsList();
-        // bit streams use 3 positions if uncompressed, 4 if compressed
-        positions = positions.subList(isCompressed ? 4 : 3, positions.size());
-        entry.clearPositions();
-        entry.addAllPositions(positions);
-      }
-    }
-
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      if (isPresent != null) {
-        isPresent.flush();
-
-        // if no nulls are found in a stream, then suppress the stream
-        if(!foundNulls) {
-          isPresentOutStream.suppress();
-          // since isPresent bitstream is suppressed, update the index to
-          // remove the positions of the isPresent stream
-          if (rowIndex != null) {
-            removeIsPresentPositions();
-          }
-        }
-      }
-
-      // merge stripe-level column statistics to file statistics and write it to
-      // stripe statistics
-      fileStatistics.merge(stripeColStatistics);
-      stats.addColStats(stripeColStatistics.serialize());
-      stripeColStatistics.reset();
-
-      // reset the flag for next stripe
-      foundNulls = false;
-
-      builder.addColumns(getEncoding());
-      if (streamFactory.hasWriterTimeZone()) {
-        builder.setWriterTimezone(TimeZone.getDefault().getID());
-      }
-      if (rowIndex != null) {
-        if (rowIndex.getEntryCount() != requiredIndexEntries) {
-          throw new IllegalArgumentException("Column has wrong number of " +
-               "index entries found: " + rowIndex.getEntryCount() + " expected: " +
-               requiredIndexEntries);
-        }
-        streamFactory.writeIndex(new StreamName(id, OrcProto.Stream.Kind.ROW_INDEX), rowIndex);
-        rowIndex.clear();
-        rowIndexEntry.clear();
-      }
-
-      // write the bloom filter to out stream
-      if (bloomFilterIndex != null) {
-        streamFactory.writeBloomFilter(new StreamName(id,
-            OrcProto.Stream.Kind.BLOOM_FILTER), bloomFilterIndex);
-        bloomFilterIndex.clear();
-      }
-      // write the bloom filter to out stream
-      if (bloomFilterIndexUtf8 != null) {
-        streamFactory.writeBloomFilter(new StreamName(id,
-            OrcProto.Stream.Kind.BLOOM_FILTER_UTF8), bloomFilterIndexUtf8);
-        bloomFilterIndexUtf8.clear();
-      }
-    }
-
-    /**
-     * Get the encoding for this column.
-     * @return the information about the encoding of this column
-     */
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder builder =
-          OrcProto.ColumnEncoding.newBuilder()
-              .setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      if (createBloomFilter) {
-        builder.setBloomEncoding(BloomFilterIO.Encoding.CURRENT.getId());
-      }
-      return builder;
-    }
-
-    /**
-     * Create a row index entry with the previous location and the current
-     * index statistics. Also merges the index statistics into the file
-     * statistics before they are cleared. Finally, it records the start of the
-     * next index and ensures all of the children columns also create an entry.
-     */
-    public void createRowIndexEntry() throws IOException {
-      stripeColStatistics.merge(indexStatistics);
-      rowIndexEntry.setStatistics(indexStatistics.serialize());
-      indexStatistics.reset();
-      rowIndex.addEntry(rowIndexEntry);
-      rowIndexEntry.clear();
-      addBloomFilterEntry();
-      recordPosition(rowIndexPosition);
-    }
-
-    void addBloomFilterEntry() {
-      if (createBloomFilter) {
-        if (bloomFilter != null) {
-          BloomFilterIO.serialize(bloomFilterEntry, bloomFilter);
-          bloomFilterIndex.addBloomFilter(bloomFilterEntry.build());
-          bloomFilter.reset();
-        }
-        if (bloomFilterUtf8 != null) {
-          BloomFilterIO.serialize(bloomFilterEntry, bloomFilterUtf8);
-          bloomFilterIndexUtf8.addBloomFilter(bloomFilterEntry.build());
-          bloomFilterUtf8.reset();
-        }
-      }
-    }
-
-    @Override
-    public void updateFileStatistics(OrcProto.StripeStatistics stats) {
-      fileStatistics.merge(ColumnStatisticsImpl.deserialize(stats.getColStats(id)));
-    }
-
-    /**
-     * Record the current position in each of this column's streams.
-     * @param recorder where should the locations be recorded
-     */
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      if (isPresent != null) {
-        isPresent.getPosition(recorder);
-      }
-    }
-
-    /**
-     * Estimate how much memory the writer is consuming excluding the streams.
-     * @return the number of bytes.
-     */
-    public long estimateMemory() {
-      long result = 0;
-      if (isPresent != null) {
-        result = isPresentOutStream.getBufferSize();
-      }
-      return result;
-    }
-
-    @Override
-    public void writeFileStatistics(OrcProto.Footer.Builder footer) {
-      footer.addStatistics(fileStatistics.serialize());
-    }
-  }
-
-  private static class BooleanTreeWriter extends TreeWriterBase {
-    private final BitFieldWriter writer;
-
-    BooleanTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      PositionedOutputStream out = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.writer = new BitFieldWriter(out, 1);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int value = vec.vector[0] == 0 ? 0 : 1;
-          indexStatistics.updateBoolean(value != 0, length);
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            int value = vec.vector[i + offset] == 0 ? 0 : 1;
-            writer.write(value);
-            indexStatistics.updateBoolean(value != 0, 1);
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      writer.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + writer.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      long num = fileStatistics.getNumberOfValues();
-      return num * JavaDataModel.get().primitive1();
-    }
-  }
-
-  private static class ByteTreeWriter extends TreeWriterBase {
-    private final RunLengthByteWriter writer;
-
-    ByteTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.writer = new RunLengthByteWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.DATA));
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          byte value = (byte) vec.vector[0];
-          indexStatistics.updateInteger(value, length);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addLong(value);
-            }
-            bloomFilterUtf8.addLong(value);
-          }
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            byte value = (byte) vec.vector[i + offset];
-            writer.write(value);
-            indexStatistics.updateInteger(value, 1);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addLong(value);
-              }
-              bloomFilterUtf8.addLong(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      writer.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + writer.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      long num = fileStatistics.getNumberOfValues();
-      return num * JavaDataModel.get().primitive1();
-    }
-  }
-
-  private static class IntegerTreeWriter extends TreeWriterBase {
-    private final IntegerWriter writer;
-    private boolean isDirectV2 = true;
-    private final boolean isLong;
-
-    IntegerTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      OutStream out = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.writer = createIntegerWriter(out, true, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-      this.isLong = schema.getCategory() == TypeDescription.Category.LONG;
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          long value = vec.vector[0];
-          indexStatistics.updateInteger(value, length);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addLong(value);
-            }
-            bloomFilterUtf8.addLong(value);
-          }
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            long value = vec.vector[i + offset];
-            writer.write(value);
-            indexStatistics.updateInteger(value, 1);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addLong(value);
-              }
-              bloomFilterUtf8.addLong(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      writer.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + writer.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      JavaDataModel jdm = JavaDataModel.get();
-      long num = fileStatistics.getNumberOfValues();
-      return num * (isLong ? jdm.primitive2() : jdm.primitive1());
-    }
-  }
-
-  private static class FloatTreeWriter extends TreeWriterBase {
-    private final PositionedOutputStream stream;
-    private final SerializationUtils utils;
-
-    FloatTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.stream = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.utils = new SerializationUtils();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      DoubleColumnVector vec = (DoubleColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          float value = (float) vec.vector[0];
-          indexStatistics.updateDouble(value);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addDouble(value);
-            }
-            bloomFilterUtf8.addDouble(value);
-          }
-          for(int i=0; i < length; ++i) {
-            utils.writeFloat(stream, value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            float value = (float) vec.vector[i + offset];
-            utils.writeFloat(stream, value);
-            indexStatistics.updateDouble(value);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addDouble(value);
-              }
-              bloomFilterUtf8.addDouble(value);
-            }
-          }
-        }
-      }
-    }
-
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      stream.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      stream.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + stream.getBufferSize();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      long num = fileStatistics.getNumberOfValues();
-      return num * JavaDataModel.get().primitive1();
-    }
-  }
-
-  private static class DoubleTreeWriter extends TreeWriterBase {
-    private final PositionedOutputStream stream;
-    private final SerializationUtils utils;
-
-    DoubleTreeWriter(int columnId,
-                    TypeDescription schema,
-                    StreamFactory writer,
-                    boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.stream = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.utils = new SerializationUtils();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      DoubleColumnVector vec = (DoubleColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          double value = vec.vector[0];
-          indexStatistics.updateDouble(value);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addDouble(value);
-            }
-            bloomFilterUtf8.addDouble(value);
-          }
-          for(int i=0; i < length; ++i) {
-            utils.writeDouble(stream, value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            double value = vec.vector[i + offset];
-            utils.writeDouble(stream, value);
-            indexStatistics.updateDouble(value);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addDouble(value);
-              }
-              bloomFilterUtf8.addDouble(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      stream.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      stream.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + stream.getBufferSize();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      long num = fileStatistics.getNumberOfValues();
-      return num * JavaDataModel.get().primitive2();
-    }
-  }
-
-  private static abstract class StringBaseTreeWriter extends TreeWriterBase {
-    private static final int INITIAL_DICTIONARY_SIZE = 4096;
-    private final OutStream stringOutput;
-    protected final IntegerWriter lengthOutput;
-    private final IntegerWriter rowOutput;
-    protected final StringRedBlackTree dictionary =
-        new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
-    protected final DynamicIntArray rows = new DynamicIntArray();
-    protected final PositionedOutputStream directStreamOutput;
-    private final List<OrcProto.RowIndexEntry> savedRowIndex =
-        new ArrayList<>();
-    private final boolean buildIndex;
-    private final List<Long> rowIndexValueCount = new ArrayList<>();
-    // If the number of keys in a dictionary is greater than this fraction of
-    //the total number of non-null rows, turn off dictionary encoding
-    private final double dictionaryKeySizeThreshold;
-    protected boolean useDictionaryEncoding = true;
-    private boolean isDirectV2 = true;
-    private boolean doneDictionaryCheck;
-    private final boolean strideDictionaryCheck;
-
-    StringBaseTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
-      stringOutput = writer.createStream(id,
-          OrcProto.Stream.Kind.DICTIONARY_DATA);
-      lengthOutput = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      rowOutput = createIntegerWriter(directStreamOutput, false, isDirectV2,
-          writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-      rowIndexValueCount.add(0L);
-      buildIndex = writer.buildIndex();
-      Configuration conf = writer.getConfiguration();
-      dictionaryKeySizeThreshold =
-          OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf);
-      strideDictionaryCheck =
-          OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf);
-      doneDictionaryCheck = false;
-    }
-
-    private void checkDictionaryEncoding() {
-      if (!doneDictionaryCheck) {
-        // Set the flag indicating whether or not to use dictionary encoding
-        // based on whether or not the fraction of distinct keys over number of
-        // non-null rows is less than the configured threshold
-        float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f;
-        useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold;
-        doneDictionaryCheck = true;
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
-      // checking would not have happened. So do it again here.
-      checkDictionaryEncoding();
-
-      if (useDictionaryEncoding) {
-        flushDictionary();
-      } else {
-        // flushout any left over entries from dictionary
-        if (rows.size() > 0) {
-          flushDictionary();
-        }
-
-        // suppress the stream for every stripe if dictionary is disabled
-        stringOutput.suppress();
-      }
-
-      // we need to build the rowindex before calling super, since it
-      // writes it out.
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      if (useDictionaryEncoding) {
-        stringOutput.flush();
-        lengthOutput.flush();
-        rowOutput.flush();
-      } else {
-        directStreamOutput.flush();
-        lengthOutput.flush();
-      }
-      // reset all of the fields to be ready for the next stripe.
-      dictionary.clear();
-      savedRowIndex.clear();
-      rowIndexValueCount.clear();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-      rowIndexValueCount.add(0L);
-
-      if (!useDictionaryEncoding) {
-        // record the start positions of first index stride of next stripe i.e
-        // beginning of the direct streams when dictionary is disabled
-        recordDirectStreamPosition();
-      }
-    }
-
-    private void flushDictionary() throws IOException {
-      final int[] dumpOrder = new int[dictionary.size()];
-
-      if (useDictionaryEncoding) {
-        // Write the dictionary by traversing the red-black tree writing out
-        // the bytes and lengths; and creating the map from the original order
-        // to the final sorted order.
-
-        dictionary.visit(new StringRedBlackTree.Visitor() {
-          private int currentId = 0;
-          @Override
-          public void visit(StringRedBlackTree.VisitorContext context
-                           ) throws IOException {
-            context.writeBytes(stringOutput);
-            lengthOutput.write(context.getLength());
-            dumpOrder[context.getOriginalPosition()] = currentId++;
-          }
-        });
-      } else {
-        // for direct encoding, we don't want the dictionary data stream
-        stringOutput.suppress();
-      }
-      int length = rows.size();
-      int rowIndexEntry = 0;
-      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
-      Text text = new Text();
-      // write the values translated into the dump order.
-      for(int i = 0; i <= length; ++i) {
-        // now that we are writing out the row values, we can finalize the
-        // row index
-        if (buildIndex) {
-          while (i == rowIndexValueCount.get(rowIndexEntry) &&
-              rowIndexEntry < savedRowIndex.size()) {
-            OrcProto.RowIndexEntry.Builder base =
-                savedRowIndex.get(rowIndexEntry++).toBuilder();
-            if (useDictionaryEncoding) {
-              rowOutput.getPosition(new RowIndexPositionRecorder(base));
-            } else {
-              PositionRecorder posn = new RowIndexPositionRecorder(base);
-              directStreamOutput.getPosition(posn);
-              lengthOutput.getPosition(posn);
-            }
-            rowIndex.addEntry(base.build());
-          }
-        }
-        if (i != length) {
-          if (useDictionaryEncoding) {
-            rowOutput.write(dumpOrder[rows.get(i)]);
-          } else {
-            dictionary.getText(text, rows.get(i));
-            directStreamOutput.write(text.getBytes(), 0, text.getLength());
-            lengthOutput.write(text.getLength());
-          }
-        }
-      }
-      rows.clear();
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (useDictionaryEncoding) {
-        result.setDictionarySize(dictionary.size());
-        if(isDirectV2) {
-          result.setKind(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2);
-        } else {
-          result.setKind(OrcProto.ColumnEncoding.Kind.DICTIONARY);
-        }
-      } else {
-        if(isDirectV2) {
-          result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-        } else {
-          result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-        }
-      }
-      return result;
-    }
-
-    /**
-     * This method doesn't call the super method, because unlike most of the
-     * other TreeWriters, this one can't record the position in the streams
-     * until the stripe is being flushed. Therefore it saves all of the entries
-     * and augments them with the final information as the stripe is written.
-     */
-    @Override
-    public void createRowIndexEntry() throws IOException {
-      getStripeStatistics().merge(indexStatistics);
-      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
-      rowIndexEntry.setStatistics(indexStatistics.serialize());
-      indexStatistics.reset();
-      OrcProto.RowIndexEntry base = rowIndexEntry.build();
-      savedRowIndex.add(base);
-      rowIndexEntry.clear();
-      addBloomFilterEntry();
-      recordPosition(rowIndexPosition);
-      rowIndexValueCount.add((long) rows.size());
-      if (strideDictionaryCheck) {
-        checkDictionaryEncoding();
-      }
-      if (!useDictionaryEncoding) {
-        if (rows.size() > 0) {
-          flushDictionary();
-          // just record the start positions of next index stride
-          recordDirectStreamPosition();
-        } else {
-          // record the start positions of next index stride
-          recordDirectStreamPosition();
-          getRowIndex().addEntry(base);
-        }
-      }
-    }
-
-    private void recordDirectStreamPosition() throws IOException {
-      if (rowIndexPosition != null) {
-        directStreamOutput.getPosition(rowIndexPosition);
-        lengthOutput.getPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public long estimateMemory() {
-      long parent = super.estimateMemory();
-      if (useDictionaryEncoding) {
-        return parent + dictionary.getSizeInBytes() + rows.getSizeInBytes();
-      } else {
-        return parent + lengthOutput.estimateMemory() +
-            directStreamOutput.getBufferSize();
-      }
-    }
-
-    @Override
-    public long getRawDataSize() {
-      // ORC strings are converted to java Strings. so use JavaDataModel to
-      // compute the overall size of strings
-      StringColumnStatistics scs = (StringColumnStatistics) fileStatistics;
-      long numVals = fileStatistics.getNumberOfValues();
-      if (numVals == 0) {
-        return 0;
-      } else {
-        int avgSize = (int) (scs.getSum() / numVals);
-        return numVals * JavaDataModel.get().lengthForStringOfLength(avgSize);
-      }
-    }
-  }
-
-  private static class StringTreeWriter extends StringBaseTreeWriter {
-    StringTreeWriter(int columnId,
-                   TypeDescription schema,
-                   StreamFactory writer,
-                   boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          if (useDictionaryEncoding) {
-            int id = dictionary.add(vec.vector[0], vec.start[0], vec.length[0]);
-            for(int i=0; i < length; ++i) {
-              rows.add(id);
-            }
-          } else {
-            for(int i=0; i < length; ++i) {
-              directStreamOutput.write(vec.vector[0], vec.start[0],
-                  vec.length[0]);
-              lengthOutput.write(vec.length[0]);
-            }
-          }
-          indexStatistics.updateString(vec.vector[0], vec.start[0],
-              vec.length[0], length);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              // translate from UTF-8 to the default charset
-              bloomFilter.addString(new String(vec.vector[0], vec.start[0],
-                  vec.length[0], StandardCharsets.UTF_8));
-            }
-            bloomFilterUtf8.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            if (useDictionaryEncoding) {
-              rows.add(dictionary.add(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]));
-            } else {
-              directStreamOutput.write(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-              lengthOutput.write(vec.length[offset + i]);
-            }
-            indexStatistics.updateString(vec.vector[offset + i],
-                vec.start[offset + i], vec.length[offset + i], 1);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                // translate from UTF-8 to the default charset
-                bloomFilter.addString(new String(vec.vector[offset + i],
-                    vec.start[offset + i], vec.length[offset + i],
-                    StandardCharsets.UTF_8));
-              }
-              bloomFilterUtf8.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Under the covers, char is written to ORC the same way as string.
-   */
-  private static class CharTreeWriter extends StringBaseTreeWriter {
-    private final int itemLength;
-    private final byte[] padding;
-
-    CharTreeWriter(int columnId,
-        TypeDescription schema,
-        StreamFactory writer,
-        boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      itemLength = schema.getMaxLength();
-      padding = new byte[itemLength];
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          byte[] ptr;
-          int ptrOffset;
-          if (vec.length[0] >= itemLength) {
-            ptr = vec.vector[0];
-            ptrOffset = vec.start[0];
-          } else {
-            ptr = padding;
-            ptrOffset = 0;
-            System.arraycopy(vec.vector[0], vec.start[0], ptr, 0,
-                vec.length[0]);
-            Arrays.fill(ptr, vec.length[0], itemLength, (byte) ' ');
-          }
-          if (useDictionaryEncoding) {
-            int id = dictionary.add(ptr, ptrOffset, itemLength);
-            for(int i=0; i < length; ++i) {
-              rows.add(id);
-            }
-          } else {
-            for(int i=0; i < length; ++i) {
-              directStreamOutput.write(ptr, ptrOffset, itemLength);
-              lengthOutput.write(itemLength);
-            }
-          }
-          indexStatistics.updateString(ptr, ptrOffset, itemLength, length);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              // translate from UTF-8 to the default charset
-              bloomFilter.addString(new String(vec.vector[0], vec.start[0],
-                  vec.length[0], StandardCharsets.UTF_8));
-            }
-            bloomFilterUtf8.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            byte[] ptr;
-            int ptrOffset;
-            if (vec.length[offset + i] >= itemLength) {
-              ptr = vec.vector[offset + i];
-              ptrOffset = vec.start[offset + i];
-            } else {
-              // it is the wrong length, so copy it
-              ptr = padding;
-              ptrOffset = 0;
-              System.arraycopy(vec.vector[offset + i], vec.start[offset + i],
-                  ptr, 0, vec.length[offset + i]);
-              Arrays.fill(ptr, vec.length[offset + i], itemLength, (byte) ' ');
-            }
-            if (useDictionaryEncoding) {
-              rows.add(dictionary.add(ptr, ptrOffset, itemLength));
-            } else {
-              directStreamOutput.write(ptr, ptrOffset, itemLength);
-              lengthOutput.write(itemLength);
-            }
-            indexStatistics.updateString(ptr, ptrOffset, itemLength, 1);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                // translate from UTF-8 to the default charset
-                bloomFilter.addString(new String(vec.vector[offset + i],
-                    vec.start[offset + i], vec.length[offset + i],
-                    StandardCharsets.UTF_8));
-              }
-              bloomFilterUtf8.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Under the covers, varchar is written to ORC the same way as string.
-   */
-  private static class VarcharTreeWriter extends StringBaseTreeWriter {
-    private final int maxLength;
-
-    VarcharTreeWriter(int columnId,
-        TypeDescription schema,
-        StreamFactory writer,
-        boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      maxLength = schema.getMaxLength();
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int itemLength = Math.min(vec.length[0], maxLength);
-          if (useDictionaryEncoding) {
-            int id = dictionary.add(vec.vector[0], vec.start[0], itemLength);
-            for(int i=0; i < length; ++i) {
-              rows.add(id);
-            }
-          } else {
-            for(int i=0; i < length; ++i) {
-              directStreamOutput.write(vec.vector[0], vec.start[0],
-                  itemLength);
-              lengthOutput.write(itemLength);
-            }
-          }
-          indexStatistics.updateString(vec.vector[0], vec.start[0],
-              itemLength, length);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              // translate from UTF-8 to the default charset
-              bloomFilter.addString(new String(vec.vector[0],
-                  vec.start[0], itemLength,
-                  StandardCharsets.UTF_8));
-            }
-            bloomFilterUtf8.addBytes(vec.vector[0],
-                vec.start[0], itemLength);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            int itemLength = Math.min(vec.length[offset + i], maxLength);
-            if (useDictionaryEncoding) {
-              rows.add(dictionary.add(vec.vector[offset + i],
-                  vec.start[offset + i], itemLength));
-            } else {
-              directStreamOutput.write(vec.vector[offset + i],
-                  vec.start[offset + i], itemLength);
-              lengthOutput.write(itemLength);
-            }
-            indexStatistics.updateString(vec.vector[offset + i],
-                vec.start[offset + i], itemLength, 1);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                // translate from UTF-8 to the default charset
-                bloomFilter.addString(new String(vec.vector[offset + i],
-                    vec.start[offset + i], itemLength,
-                    StandardCharsets.UTF_8));
-              }
-              bloomFilterUtf8.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], itemLength);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  private static class BinaryTreeWriter extends TreeWriterBase {
-    private final PositionedOutputStream stream;
-    private final IntegerWriter length;
-    private boolean isDirectV2 = true;
-
-    BinaryTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.stream = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.length = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          for(int i=0; i < length; ++i) {
-            stream.write(vec.vector[0], vec.start[0],
-                  vec.length[0]);
-            this.length.write(vec.length[0]);
-          }
-          indexStatistics.updateBinary(vec.vector[0], vec.start[0],
-              vec.length[0], length);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
-            }
-            bloomFilterUtf8.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            stream.write(vec.vector[offset + i],
-                vec.start[offset + i], vec.length[offset + i]);
-            this.length.write(vec.length[offset + i]);
-            indexStatistics.updateBinary(vec.vector[offset + i],
-                vec.start[offset + i], vec.length[offset + i], 1);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addBytes(vec.vector[offset + i],
-                    vec.start[offset + i], vec.length[offset + i]);
-              }
-              bloomFilterUtf8.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-            }
-          }
-        }
-      }
-    }
-
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      stream.flush();
-      length.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      stream.getPosition(recorder);
-      length.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + stream.getBufferSize() +
-          length.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      // get total length of binary blob
-      BinaryColumnStatistics bcs = (BinaryColumnStatistics) fileStatistics;
-      return bcs.getSum();
-    }
-  }
-
-  public static final int MILLIS_PER_SECOND = 1000;
-  public static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
-
-  private static class TimestampTreeWriter extends TreeWriterBase {
-    private final IntegerWriter seconds;
-    private final IntegerWriter nanos;
-    private final boolean isDirectV2;
-    private final TimeZone localTimezone;
-    private final long baseEpochSecsLocalTz;
-
-    TimestampTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.seconds = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.DATA), true, isDirectV2, writer);
-      this.nanos = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-      this.localTimezone = TimeZone.getDefault();
-      // for unit tests to set different time zones
-      this.baseEpochSecsLocalTz = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND;
-      writer.useWriterTimeZone(true);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      TimestampColumnVector vec = (TimestampColumnVector) vector;
-      Timestamp val;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          val = vec.asScratchTimestamp(0);
-          long millis = val.getTime();
-          long utc = SerializationUtils.convertToUtc(localTimezone, millis);
-          indexStatistics.updateTimestamp(utc);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addLong(millis);
-            }
-            bloomFilterUtf8.addLong(utc);
-          }
-          final long secs = millis / MILLIS_PER_SECOND - baseEpochSecsLocalTz;
-          final long nano = formatNanos(val.getNanos());
-          for(int i=0; i < length; ++i) {
-            seconds.write(secs);
-            nanos.write(nano);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            val = vec.asScratchTimestamp(i + offset);
-            long millis = val.getTime();
-            long secs = millis / MILLIS_PER_SECOND - baseEpochSecsLocalTz;
-            long utc = SerializationUtils.convertToUtc(localTimezone, millis);
-            seconds.write(secs);
-            nanos.write(formatNanos(val.getNanos()));
-            indexStatistics.updateTimestamp(utc);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addLong(millis);
-              }
-              bloomFilterUtf8.addLong(utc);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      seconds.flush();
-      nanos.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    private static long formatNanos(int nanos) {
-      if (nanos == 0) {
-        return 0;
-      } else if (nanos % 100 != 0) {
-        return ((long) nanos) << 3;
-      } else {
-        nanos /= 100;
-        int trailingZeros = 1;
-        while (nanos % 10 == 0 && trailingZeros < 7) {
-          nanos /= 10;
-          trailingZeros += 1;
-        }
-        return ((long) nanos) << 3 | trailingZeros;
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      seconds.getPosition(recorder);
-      nanos.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + seconds.estimateMemory() +
-          nanos.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      return fileStatistics.getNumberOfValues() *
-          JavaDataModel.get().lengthOfTimestamp();
-    }
-  }
-
-  private static class DateTreeWriter extends TreeWriterBase {
-    private final IntegerWriter writer;
-    private final boolean isDirectV2;
-
-    DateTreeWriter(int columnId,
-                   TypeDescription schema,
-                   StreamFactory writer,
-                   boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      OutStream out = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.writer = createIntegerWriter(out, true, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int value = (int) vec.vector[0];
-          indexStatistics.updateDate(value);
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addLong(value);
-            }
-            bloomFilterUtf8.addLong(value);
-          }
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            int value = (int) vec.vector[i + offset];
-            writer.write(value);
-            indexStatistics.updateDate(value);
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addLong(value);
-              }
-              bloomFilterUtf8.addLong(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      writer.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + writer.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      return fileStatistics.getNumberOfValues() *
-          JavaDataModel.get().lengthOfDate();
-    }
-  }
-
-  private static class DecimalTreeWriter extends TreeWriterBase {
-    private final PositionedOutputStream valueStream;
-
-    // These scratch buffers allow us to serialize decimals much faster.
-    private final long[] scratchLongs;
-    private final byte[] scratchBuffer;
-
-    private final IntegerWriter scaleStream;
-    private final boolean isDirectV2;
-
-    DecimalTreeWriter(int columnId,
-                        TypeDescription schema,
-                        StreamFactory writer,
-                        boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA);
-      scratchLongs = new long[HiveDecimal.SCRATCH_LONGS_LEN];
-      scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
-      this.scaleStream = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.SECONDARY), true, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      DecimalColumnVector vec = (DecimalColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          HiveDecimalWritable value = vec.vector[0];
-          indexStatistics.updateDecimal(value);
-          if (createBloomFilter) {
-            String str = value.toString(scratchBuffer);
-            if (bloomFilter != null) {
-              bloomFilter.addString(str);
-            }
-            bloomFilterUtf8.addString(str);
-          }
-          for(int i=0; i < length; ++i) {
-            value.serializationUtilsWrite(valueStream,
-                                          scratchLongs);
-            scaleStream.write(value.scale());
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            HiveDecimalWritable value = vec.vector[i + offset];
-            value.serializationUtilsWrite(valueStream, scratchLongs);
-            scaleStream.write(value.scale());
-            indexStatistics.updateDecimal(value);
-            if (createBloomFilter) {
-              String str = value.toString(scratchBuffer);
-              if (bloomFilter != null) {
-                bloomFilter.addString(str);
-              }
-              bloomFilterUtf8.addString(str);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      valueStream.flush();
-      scaleStream.flush();
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      valueStream.getPosition(recorder);
-      scaleStream.getPosition(recorder);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + valueStream.getBufferSize() +
-          scaleStream.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      return fileStatistics.getNumberOfValues() *
-          JavaDataModel.get().lengthOfDecimal();
-    }
-  }
-
-  private static class StructTreeWriter extends TreeWriterBase {
-    final TreeWriter[] childrenWriters;
-
-    StructTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      List<TypeDescription> children = schema.getChildren();
-      childrenWriters = new TreeWriterBase[children.size()];
-      for(int i=0; i < childrenWriters.length; ++i) {
-        childrenWriters[i] = createTreeWriter(
-          children.get(i), writer,
-          true);
-      }
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void writeRootBatch(VectorizedRowBatch batch, int offset,
-                               int length) throws IOException {
-      // update the statistics for the root column
-      indexStatistics.increment(length);
-      // I'm assuming that the root column isn't nullable so that I don't need
-      // to update isPresent.
-      for(int i=0; i < childrenWriters.length; ++i) {
-        childrenWriters[i].writeBatch(batch.cols[i], offset, length);
-      }
-    }
-
-    private static void writeFields(StructColumnVector vector,
-                                    TreeWriter[] childrenWriters,
-                                    int offset, int length) throws IOException {
-      for(int field=0; field < childrenWriters.length; ++field) {
-        childrenWriters[field].writeBatch(vector.fields[field], offset, length);
-      }
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      StructColumnVector vec = (StructColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          writeFields(vec, childrenWriters, offset, length);
-        }
-      } else if (vector.noNulls) {
-        writeFields(vec, childrenWriters, offset, length);
-      } else {
-        // write the records in runs
-        int currentRun = 0;
-        boolean started = false;
-        for(int i=0; i < length; ++i) {
-          if (!vec.isNull[i + offset]) {
-            if (!started) {
-              started = true;
-              currentRun = i;
-            }
-          } else if (started) {
-            started = false;
-            writeFields(vec, childrenWriters, offset + currentRun,
-                i - currentRun);
-          }
-        }
-        if (started) {
-          writeFields(vec, childrenWriters, offset + currentRun,
-              length - currentRun);
-        }
-      }
-    }
-
-    @Override
-    public void createRowIndexEntry() throws IOException {
-      super.createRowIndexEntry();
-      for(TreeWriter child: childrenWriters) {
-        child.createRowIndexEntry();
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      for(TreeWriter child: childrenWriters) {
-        child.writeStripe(builder, stats, requiredIndexEntries);
-      }
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    public void updateFileStatistics(OrcProto.StripeStatistics stats) {
-      super.updateFileStatistics(stats);
-      for(TreeWriter child: childrenWriters) {
-        child.updateFileStatistics(stats);
-      }
-    }
-
-    @Override
-    public long estimateMemory() {
-      long result = 0;
-      for(TreeWriter writer: childrenWriters) {
-        result += writer.estimateMemory();
-      }
-      return super.estimateMemory() + result;
-    }
-
-    @Override
-    public long getRawDataSize() {
-      long result = 0;
-      for(TreeWriter writer: childrenWriters) {
-        result += writer.getRawDataSize();
-      }
-      return result;
-    }
-
-    @Override
-    public void writeFileStatistics(OrcProto.Footer.Builder footer) {
-      super.writeFileStatistics(footer);
-      for(TreeWriter child: childrenWriters) {
-        child.writeFileStatistics(footer);
-      }
-    }
-  }
-
-  private static class ListTreeWriter extends TreeWriterBase {
-    private final IntegerWriter lengths;
-    private final boolean isDirectV2;
-    private final TreeWriter childWriter;
-
-    ListTreeWriter(int columnId,
-                   TypeDescription schema,
-                   StreamFactory writer,
-                   boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      childWriter =
-        createTreeWriter(schema.getChildren().get(0), writer, true);
-      lengths = createIntegerWriter(writer.createStream(columnId,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public void createRowIndexEntry() throws IOException {
-      super.createRowIndexEntry();
-      childWriter.createRowIndexEntry();
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      ListColumnVector vec = (ListColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int childOffset = (int) vec.offsets[0];
-          int childLength = (int) vec.lengths[0];
-          for(int i=0; i < length; ++i) {
-            lengths.write(childLength);
-            childWriter.writeBatch(vec.child, childOffset, childLength);
-          }
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addLong(childLength);
-            }
-            bloomFilterUtf8.addLong(childLength);
-          }
-        }
-      } else {
-        // write the elements in runs
-        int currentOffset = 0;
-        int currentLength = 0;
-        for(int i=0; i < length; ++i) {
-          if (!vec.isNull[i + offset]) {
-            int nextLength = (int) vec.lengths[offset + i];
-            int nextOffset = (int) vec.offsets[offset + i];
-            lengths.write(nextLength);
-            if (currentLength == 0) {
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else if (currentOffset + currentLength != nextOffset) {
-              childWriter.writeBatch(vec.child, currentOffset,
-                  currentLength);
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else {
-              currentLength += nextLength;
-            }
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addLong(nextLength);
-              }
-              bloomFilterUtf8.addLong(nextLength);
-            }
+  CompressionCodec getCustomizedCodec(OrcProto.Stream.Kind kind) {
+    // TODO: modify may create a new codec here. We want to end() it when the stream is closed,
+    //       but at this point there's no close() for the stream.
+    CompressionCodec result = physicalWriter.getCompressionCodec();
+    if (result != null) {
+      switch (kind) {
+        case BLOOM_FILTER:
+        case DATA:
+        case DICTIONARY_DATA:
+        case BLOOM_FILTER_UTF8:
+          if (compressionStrategy == OrcFile.CompressionStrategy.SPEED) {
+            result = result.modify(EnumSet.of(CompressionCodec.Modifier.FAST,
+                CompressionCodec.Modifier.TEXT));
+          } else {
+            result = result.modify(EnumSet.of(CompressionCodec.Modifier.DEFAULT,
+                CompressionCodec.Modifier.TEXT));
           }
-        }
-        if (currentLength != 0) {
-          childWriter.writeBatch(vec.child, currentOffset,
-              currentLength);
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      lengths.flush();
-      childWriter.writeStripe(builder, stats, requiredIndexEntries);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
+          break;
+        case LENGTH:
+        case DICTIONARY_COUNT:
+        case PRESENT:
+        case ROW_INDEX:
+        case SECONDARY:
+          // easily compressed using the fastest modes
+          result = result.modify(EnumSet.of(CompressionCodec.Modifier.FASTEST,
+              CompressionCodec.Modifier.BINARY));
+          break;
+        default:
+          LOG.info("Missing ORC compression modifiers for " + kind);
+          break;
       }
     }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      lengths.getPosition(recorder);
-    }
-
-    @Override
-    public void updateFileStatistics(OrcProto.StripeStatistics stats) {
-      super.updateFileStatistics(stats);
-      childWriter.updateFileStatistics(stats);
-    }
-
-    @Override
-    public long estimateMemory() {
-      return super.estimateMemory() + lengths.estimateMemory() +
-          childWriter.estimateMemory();
-    }
-
-    @Override
-    public long getRawDataSize() {
-      return childWriter.getRawDataSize();
-    }
-
-    @Override
-    public void writeFileStatistics(OrcProto.Footer.Builder footer) {
-      super.writeFileStatistics(footer);
-      childWriter.writeFileStatistics(footer);
-    }
+    return result;
   }
 
-  private static class MapTreeWriter extends TreeWriterBase {
-    private final IntegerWriter lengths;
-    private final boolean isDirectV2;
-    private final TreeWriter keyWriter;
-    private final TreeWriter valueWriter;
-
-    MapTreeWriter(int columnId,
-                  TypeDescription schema,
-                  StreamFactory writer,
-                  boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      List<TypeDescription> children = schema.getChildren();
-      keyWriter = createTreeWriter(children.get(0), writer, true);
-      valueWriter = createTreeWriter(children.get(1), writer, true);
-      lengths = createIntegerWriter(writer.createStream(columnId,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    OrcProto.ColumnEncoding.Builder getEncoding() {
-      OrcProto.ColumnEncoding.Builder result = super.getEncoding();
-      if (isDirectV2) {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
-      } else {
-        result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
-      }
-      return result;
-    }
-
-    @Override
-    public void createRowIndexEntry() throws IOException {
-      super.createRowIndexEntry();
-      keyWriter.createRowIndexEntry();
-      valueWriter.createRowIndexEntry();
-    }
-
-    @Override
-    public void writeBatch(ColumnVector vector, int offset,
-                           int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      MapColumnVector vec = (MapColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int childOffset = (int) vec.offsets[0];
-          int childLength = (int) vec.lengths[0];
-          for(int i=0; i < length; ++i) {
-            lengths.write(childLength);
-            keyWriter.writeBatch(vec.keys, childOffset, childLength);
-            valueWriter.writeBatch(vec.values, childOffset, childLength);
-          }
-          if (createBloomFilter) {
-            if (bloomFilter != null) {
-              bloomFilter.addLong(childLength);
-            }
-            bloomFilterUtf8.addLong(childLength);
-          }
-        }
-      } else {
-        // write the elements in runs
-        int currentOffset = 0;
-        int currentLength = 0;
-        for(int i=0; i < length; ++i) {
-          if (!vec.isNull[i + offset]) {
-            int nextLength = (int) vec.lengths[offset + i];
-            int nextOffset = (int) vec.offsets[offset + i];
-            lengths.write(nextLength);
-            if (currentLength == 0) {
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else if (currentOffset + currentLength != nextOffset) {
-              keyWriter.writeBatch(vec.keys, currentOffset,
-                  currentLength);
-              valueWriter.writeBatch(vec.values, currentOffset,
-                  currentLength);
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else {
-              currentLength += nextLength;
-            }
-            if (createBloomFilter) {
-              if (bloomFilter != null) {
-                bloomFilter.addLong(nextLength);
-              }
-              bloomFilterUtf8.addLong(nextLength);
-            }
-          }
-        }
-        if (currentLength != 0) {
-          keyWriter.writeBatch(vec.keys, currentOffset,
-              currentLength);
-          valueWriter.writeBatch(vec.values, currentOffset,
-              currentLength);
-        }
-      }
-    }
-
-    @Override
-    public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                            OrcProto.StripeStatistics.Builder stats,
-                            int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, stats, requiredIndexEntries);
-      lengths.flush();
-      keyWriter.writeStripe(builder, stats, requiredIndexEntries);
-      valueWriter.writeStripe(builder, stats, requiredIndexEntries);
-      if (rowIndexPosition != null) {
-        recordPosition(rowIndexPosition);
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      lengths.getPosition(recorder);
-    }
-
-    @Override
-    public void updateFileStatistics(OrcProto.StripeStatistics stats) {
-      super.updateFileStatistics(stats);
-      keyWriter.updateFileStatistics(stats);
-      valueWriter.updateFileStatistics(stats);
-    }
+  /**
+   * Interface from the Writer to the TreeWriters. This limits the visibility
+   * that the TreeWriters have into the Writer.
+   */
+  private class StreamFactory implements WriterContext {
+    /**
+     * Create a stream to store part of a column.
+     * @param column the column id for the stream
+     * @param kind the kind of stream
+     * @return The output outStream that the section needs to be written to.
+     */
+    public OutStream createStream(int column,
+                                  OrcProto.Stream.Kind kind
+                                  ) throws IOException {
+      final StreamName name = new StreamName(column, kind);
+      CompressionCodec codec = getCustomizedCodec(kind);
 
-    @Ov

<TRUNCATED>