You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ar...@apache.org on 2019/01/30 13:16:30 UTC

[drill] 02/02: DRILL-7006: Add type conversion to row writers

This is an automated email from the ASF dual-hosted git repository.

arina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 8fb85cd4370e6143641cda1ad5b998caca0b6bf7
Author: Paul Rogers <pr...@cloudera.com>
AuthorDate: Sun Jan 27 10:32:46 2019 -0800

    DRILL-7006: Add type conversion to row writers
    
    Modifies the column metadata and writer abstractions to allow a type conversion "shim" to be specified as part of the schema, then inserted as part of the row set writer. Allows, say, setting an Int or Date from a string, parsing the string to obtain the proper data type to store in the vector.
    
    Type conversion not yet supported in the result set loader: some additional complexity needs to be resolved.
    
    Adds unit tests for this functionality. Refactors some existing tests to remove rough edges.
    
    closes #1623
---
 .../record/metadata/AbstractColumnMetadata.java    |  16 ++
 .../record/metadata/PrimitiveColumnMetadata.java   |  56 ++++++-
 .../rowSet/impl/TestResultSetLoaderProtocol.java   |  54 ++++++
 .../test/rowSet/test/TestColumnConvertor.java      | 145 ++++++++++++++++
 .../test/{RowSetTest.java => TestRowSet.java}      |  45 +++--
 .../rowSet/{ => test}/TestRowSetComparison.java    |   5 +-
 .../drill/exec/record/metadata/ColumnMetadata.java |  40 +++++
 .../vector/accessor/ColumnConversionFactory.java   |  40 +++++
 .../accessor/UnsupportedConversionError.java       |   2 +-
 .../accessor/writer/AbstractScalarWriter.java      |  52 ++----
 .../accessor/writer/AbstractWriteConvertor.java    | 186 +++++++++++++++++++++
 .../vector/accessor/writer/ConcreteWriter.java     |  69 ++++++++
 .../vector/accessor/writer/ScalarArrayWriter.java  |  24 +--
 13 files changed, 654 insertions(+), 80 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
index 595c5c4..63dda07 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
@@ -21,6 +21,8 @@ import org.apache.drill.common.types.TypeProtos.DataMode;
 import org.apache.drill.common.types.TypeProtos.MajorType;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
+import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
 
 /**
  * Abstract definition of column metadata. Allows applications to create
@@ -179,6 +181,20 @@ public abstract class AbstractColumnMetadata implements ColumnMetadata {
   public boolean isProjected() { return projected; }
 
   @Override
+  public void setDefaultValue(Object value) { }
+
+  @Override
+  public Object defaultValue() { return null; }
+
+  @Override
+  public void setTypeConverter(ColumnConversionFactory factory) {
+    throw new UnsupportedConversionError("Type conversion not supported for non-scalar writers");
+  }
+
+  @Override
+  public ColumnConversionFactory typeConverter() { return null; }
+
+  @Override
   public String toString() {
     final StringBuilder buf = new StringBuilder()
         .append("[")
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java
index dfbd4a9..ead6134 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java
@@ -23,16 +23,52 @@ import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.common.types.Types;
 import org.apache.drill.exec.expr.TypeHelper;
 import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
 
 /**
- * Primitive (non-map) column. Describes non-nullable, nullable and
- * array types (which differ only in mode, but not in metadata structure.)
+ * Primitive (non-map) column. Describes non-nullable, nullable and array types
+ * (which differ only in mode, but not in metadata structure.)
+ * <p>
+ * Metadata is of two types:
+ * <ul>
+ * <li>Storage metadata that describes how the column is materialized in a
+ * vector. Storage metadata is immutable because revising an existing vector is
+ * a complex operation.</li>
+ * <li>Supplemental metadata used when reading or writing the column.
+ * Supplemental metadata can be changed after the column is created, though it
+ * should generally be set before invoking code that uses the metadata.</li>
+ * </ul>
  */
 
 public class PrimitiveColumnMetadata extends AbstractColumnMetadata {
 
+  /**
+   * Expected (average) width for variable-width columns.
+   */
+
   private int expectedWidth;
 
+  /**
+   * Default value to use for filling a vector when no real data is
+   * available, such as for columns added in new files but which does not
+   * exist in existing files. The ultimate default value is the SQL null
+   * value, which works only for nullable columns.
+   */
+
+  private Object defaultValue;
+
+  /**
+   * Factory for an optional shim writer that translates from the type of
+   * data available to the code that creates the vectors on the one hand,
+   * and the actual type of the column on the other. For example, a shim
+   * might parse a string form of a date into the form stored in vectors.
+   * <p>
+   * The default is to use the "natural" type: that is, to insert no
+   * conversion shim.
+   */
+
+  private ColumnConversionFactory shimFactory;
+
   public PrimitiveColumnMetadata(MaterializedField schema) {
     super(schema);
     expectedWidth = estimateWidth(schema.getType());
@@ -99,6 +135,22 @@ public class PrimitiveColumnMetadata extends AbstractColumnMetadata {
   }
 
   @Override
+  public void setDefaultValue(Object value) {
+    defaultValue = value;
+  }
+
+  @Override
+  public Object defaultValue() { return defaultValue; }
+
+  @Override
+  public void setTypeConverter(ColumnConversionFactory factory) {
+    shimFactory = factory;
+  }
+
+  @Override
+  public ColumnConversionFactory typeConverter() { return shimFactory; }
+
+  @Override
   public ColumnMetadata cloneEmpty() {
     return new PrimitiveColumnMetadata(this);
   }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
index 088e8f4..8fb600d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.physical.rowSet.impl;
 
+import static org.apache.drill.test.rowSet.RowSetUtilities.intArray;
 import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -43,8 +44,10 @@ import org.apache.drill.exec.vector.accessor.TupleWriter.UndefinedColumnExceptio
 import org.apache.drill.test.SubOperatorTest;
 import org.apache.drill.test.rowSet.RowSet;
 import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.test.TestColumnConvertor.TestConvertor;
 import org.apache.drill.test.rowSet.RowSetReader;
 import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -602,4 +605,55 @@ public class TestResultSetLoaderProtocol extends SubOperatorTest {
 
     rsLoader.close();
   }
+
+  /**
+   * Test the use of a column type converter in the result set loader for
+   * required, nullable and repeated columns.
+   */
+
+  @Ignore("Not yet")
+  @Test
+  public void testTypeConversion() {
+    TupleMetadata schema = new SchemaBuilder()
+        .add("n1", MinorType.INT)
+        .addNullable("n2", MinorType.INT)
+        .addArray("n3", MinorType.INT)
+        .buildSchema();
+
+    // Add a type convertor. Passed in as a factory
+    // since we must create a new one for each row set writer.
+
+    schema.metadata("n1").setTypeConverter(TestConvertor.factory());
+    schema.metadata("n2").setTypeConverter(TestConvertor.factory());
+    schema.metadata("n3").setTypeConverter(TestConvertor.factory());
+
+    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+        .setSchema(schema)
+        .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+        .build();
+    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+
+    // Write data as both a string as an integer
+
+    RowSetLoader rootWriter = rsLoader.writer();
+    rootWriter.addRow("123", "12", strArray("123", "124"));
+    rootWriter.addRow(234, 23, intArray(234, 235));
+    RowSet actual = fixture.wrap(rsLoader.harvest());
+
+    // Build the expected vector without a type convertor.
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+        .add("n1", MinorType.INT)
+        .addNullable("n2", MinorType.INT)
+        .addArray("n3", MinorType.INT)
+        .buildSchema();
+    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+        .addRow(123, 12, intArray(123, 124))
+        .addRow(234, 23, intArray(234, 235))
+        .build();
+
+    // Compare
+
+    RowSetUtilities.verify(expected, actual);
+  }
 }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java
new file mode 100644
index 0000000..b7d865d
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
+import static org.apache.drill.test.rowSet.RowSetUtilities.intArray;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractWriteConvertor;
+import org.apache.drill.exec.vector.accessor.writer.ConcreteWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.junit.Test;
+
+/**
+ * Tests the column type convertor feature of the column metadata
+ * and of the RowSetWriter.
+ */
+
+public class TestColumnConvertor extends SubOperatorTest {
+
+  /**
+   * Simple type converter that allows string-to-int conversions.
+   * Inherits usual int value support from the base writer.
+   */
+  public static class TestConvertor extends AbstractWriteConvertor {
+
+    public TestConvertor(ScalarWriter baseWriter) {
+      super(baseWriter);
+    }
+
+    @Override
+    public void setString(String value) {
+      setInt(Integer.parseInt(value));
+    }
+
+    public static ColumnConversionFactory factory() {
+      return new ColumnConversionFactory() {
+        @Override
+        public ConcreteWriter newWriter(ColumnMetadata colDefn,
+            ConcreteWriter baseWriter) {
+           return new TestConvertor(baseWriter);
+        }
+      };
+    }
+  }
+
+  @Test
+  public void testScalarConvertor() {
+
+    // Create the schema
+
+    TupleMetadata schema = new SchemaBuilder()
+        .add("n1", MinorType.INT)
+        .addNullable("n2", MinorType.INT)
+        .buildSchema();
+
+    // Add a type convertor. Passed in as a factory
+    // since we must create a new one for each row set writer.
+
+    schema.metadata("n1").setTypeConverter(TestConvertor.factory());
+    schema.metadata("n2").setTypeConverter(TestConvertor.factory());
+
+    // Write data as both a string as an integer
+
+    RowSet actual = new RowSetBuilder(fixture.allocator(), schema)
+        .addRow("123", "12")
+        .addRow(234, 23)
+        .build();
+
+    // Build the expected vector without a type convertor.
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+        .add("n1", MinorType.INT)
+        .addNullable("n2", MinorType.INT)
+        .buildSchema();
+    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+        .addRow(123, 12)
+        .addRow(234, 23)
+        .build();
+
+    // Compare
+
+    RowSetUtilities.verify(expected, actual);
+  }
+
+  @Test
+  public void testArrayConvertor() {
+
+    // Create the schema
+
+    TupleMetadata schema = new SchemaBuilder()
+        .addArray("n", MinorType.INT)
+        .buildSchema();
+
+    // Add a type convertor. Passed in as a factory
+    // since we must create a new one for each row set writer.
+
+    schema.metadata("n").setTypeConverter(TestConvertor.factory());
+
+    // Write data as both a string as an integer
+
+    RowSet actual = new RowSetBuilder(fixture.allocator(), schema)
+        .addSingleCol(strArray("123", "124"))
+        .addSingleCol(intArray(234, 235))
+        .build();
+
+    // Build the expected vector without a type convertor.
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+        .addArray("n", MinorType.INT)
+        .buildSchema();
+    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+        .addSingleCol(intArray(123, 124))
+        .addSingleCol(intArray(234, 235))
+        .build();
+
+    // Compare
+
+    RowSetUtilities.verify(expected, actual);
+  }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java
similarity index 96%
rename from exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
rename to exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java
index ee5c599..b660672 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java
@@ -29,7 +29,6 @@ import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
 
 import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.record.BatchSchema;
 import org.apache.drill.exec.record.metadata.SchemaBuilder;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.vector.ValueVector;
@@ -47,8 +46,8 @@ import org.apache.drill.test.SubOperatorTest;
 import org.apache.drill.test.rowSet.DirectRowSet;
 import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
 import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
 import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetUtilities;
 import org.apache.drill.test.rowSet.RowSetWriter;
 import org.junit.Test;
 
@@ -72,8 +71,8 @@ import org.junit.Test;
  * A list is an array of variants. Variants are tested elsewhere.
  */
 
-public class RowSetTest extends SubOperatorTest {
-  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(RowSetTest.class);
+public class TestRowSet extends SubOperatorTest {
+  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestRowSet.class);
 
   /**
    * Test the simplest constructs: a row with top-level scalar
@@ -169,7 +168,7 @@ public class RowSetTest extends SubOperatorTest {
         .addRow(30)
         .addRow(40)
         .build();
-    new RowSetComparison(expected).verifyAndClearAll(actual);
+    RowSetUtilities.verify(expected, actual);
   }
 
   /**
@@ -303,8 +302,7 @@ public class RowSetTest extends SubOperatorTest {
         .addSingleCol(intArray(30))
         .addSingleCol(intArray(40, 41))
         .build();
-    new RowSetComparison(expected)
-      .verifyAndClearAll(actual);
+    RowSetUtilities.verify(expected, actual);
   }
 
   /**
@@ -438,8 +436,7 @@ public class RowSetTest extends SubOperatorTest {
         .addRow(20, objArray(intArray(21, 22)))
         .addRow(30, objArray(intArray(31, 32)))
         .build();
-    new RowSetComparison(expected)
-      .verifyAndClearAll(actual);
+    RowSetUtilities.verify(expected, actual);
   }
 
   @Test
@@ -583,8 +580,7 @@ public class RowSetTest extends SubOperatorTest {
         .addRow(20, objArray(objArray(201, 202), objArray(211, 212)))
         .addRow(30, objArray(objArray(301, 302), objArray(311, 312)))
         .build();
-    new RowSetComparison(expected)
-      .verifyAndClearAll(actual);
+    RowSetUtilities.verify(expected, actual);
   }
 
   /**
@@ -594,12 +590,12 @@ public class RowSetTest extends SubOperatorTest {
 
   @Test
   public void testTopFixedWidthArray() {
-    final BatchSchema batchSchema = new SchemaBuilder()
+    final TupleMetadata schema = new SchemaBuilder()
         .add("c", MinorType.INT)
         .addArray("a", MinorType.INT)
-        .build();
+        .buildSchema();
 
-    final ExtendableRowSet rs1 = fixture.rowSet(batchSchema);
+    final ExtendableRowSet rs1 = fixture.rowSet(schema);
     final RowSetWriter writer = rs1.writer();
     writer.scalar(0).setInt(10);
     final ScalarWriter array = writer.array(1).scalar();
@@ -644,14 +640,13 @@ public class RowSetTest extends SubOperatorTest {
     assertEquals(0, arrayReader.size());
     assertFalse(reader.next());
 
-    final SingleRowSet rs2 = fixture.rowSetBuilder(batchSchema)
+    final SingleRowSet rs2 = fixture.rowSetBuilder(schema)
       .addRow(10, intArray(100, 110))
       .addRow(20, intArray(200, 120, 220))
       .addRow(30, null)
       .build();
 
-    new RowSetComparison(rs1)
-      .verifyAndClearAll(rs2);
+    RowSetUtilities.verify(rs1, rs2);
   }
   /**
    * Test filling a row set up to the maximum number of rows.
@@ -661,11 +656,11 @@ public class RowSetTest extends SubOperatorTest {
 
   @Test
   public void testRowBounds() {
-    final BatchSchema batchSchema = new SchemaBuilder()
+    final TupleMetadata schema = new SchemaBuilder()
         .add("a", MinorType.INT)
-        .build();
+        .buildSchema();
 
-    final ExtendableRowSet rs = fixture.rowSet(batchSchema);
+    final ExtendableRowSet rs = fixture.rowSet(schema);
     final RowSetWriter writer = rs.writer();
     int count = 0;
     while (! writer.isFull()) {
@@ -695,10 +690,10 @@ public class RowSetTest extends SubOperatorTest {
 
   @Test
   public void testBufferBounds() {
-    final BatchSchema batchSchema = new SchemaBuilder()
+    final TupleMetadata schema = new SchemaBuilder()
         .add("a", MinorType.INT)
         .add("b", MinorType.VARCHAR)
-        .build();
+        .buildSchema();
 
     String varCharValue;
     try {
@@ -709,7 +704,7 @@ public class RowSetTest extends SubOperatorTest {
       throw new IllegalStateException(e);
     }
 
-    final ExtendableRowSet rs = fixture.rowSet(batchSchema);
+    final ExtendableRowSet rs = fixture.rowSet(schema);
     final RowSetWriter writer = rs.writer();
     int count = 0;
     try {
@@ -751,14 +746,14 @@ public class RowSetTest extends SubOperatorTest {
     // will be provided by a reader, by an incoming batch,
     // etc.
 
-    final BatchSchema schema = new SchemaBuilder()
+    final TupleMetadata schema = new SchemaBuilder()
         .add("a", MinorType.VARCHAR)
         .addArray("b", MinorType.INT)
         .addMap("c")
           .add("c1", MinorType.INT)
           .add("c2", MinorType.VARCHAR)
           .resumeSchema()
-        .build();
+        .buildSchema();
 
     // Step 2: Create a batch. Done here because this is
     // a batch-oriented test. Done automatically in the
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java
similarity index 96%
rename from exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java
rename to exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java
index 062dd56..867f61f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java
@@ -15,13 +15,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.drill.test.rowSet;
+package org.apache.drill.test.rowSet.test;
 
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.exec.memory.BufferAllocator;
 import org.apache.drill.exec.memory.RootAllocator;
 import org.apache.drill.exec.record.metadata.SchemaBuilder;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetComparison;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
index 0e0fb49..85d7d25 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
@@ -21,6 +21,7 @@ import org.apache.drill.common.types.TypeProtos.DataMode;
 import org.apache.drill.common.types.TypeProtos.MajorType;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
 
 /**
  * Metadata description of a column including names, types and structure
@@ -182,6 +183,45 @@ public interface ColumnMetadata {
   int expectedElementCount();
 
   /**
+   * Set the default value to use for filling a vector when no real data is
+   * available, such as for columns added in new files but which does not
+   * exist in existing files. The "default default" is null, which works
+   * only for nullable columns.
+   *
+   * @param value column value, represented as a Java object, acceptable
+   * to the {@link ColumnWriter#setObject()} method for this column's writer.
+   */
+  void setDefaultValue(Object value);
+
+  /**
+   * Returns the default value for this column.
+   *
+   * @return the default value, or null if no default value has been set
+   */
+  Object defaultValue();
+
+  /**
+   * Set the factory for an optional shim writer that translates from the type of
+   * data available to the code that creates the vectors on the one hand,
+   * and the actual type of the column on the other. For example, a shim
+   * might parse a string form of a date into the form stored in vectors.
+   * <p>
+   * The shim must write to the base vector for this column using one of
+   * the supported base writer "set" methods.
+   * <p>
+   * The default is to use the "natural" type: that is, to insert no
+   * conversion shim.
+   */
+  void setTypeConverter(ColumnConversionFactory factory);
+
+  /**
+   * Returns the type conversion shim for this column.
+   *
+   * @return the type conversion factory, or null if none is set
+   */
+  ColumnConversionFactory typeConverter();
+
+  /**
    * Create an empty version of this column. If the column is a scalar,
    * produces a simple copy. If a map, produces a clone without child
    * columns.
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java
new file mode 100644
index 0000000..02efd6d
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.writer.ConcreteWriter;
+
+/**
+ * Create a column type converter for the given column and base writer.
+ * The new writer is expected to be a "shim" writer that implements
+ * additional "set" methods to convert data from the type that the
+ * client requires to the type required by the underlying vector as
+ * represented by the base writer.
+ */
+public interface ColumnConversionFactory {
+  /**
+   * Create a type conversion writer for the given column, converting data
+   * to the type needed by the base writer.
+   * @param colDefn column metadata definition
+   * @param baseWriter base column writer for the column's vector
+   * @return a new scalar writer to insert between the client and
+   * the base vector
+   */
+  ConcreteWriter newWriter(ColumnMetadata colDefn, ConcreteWriter baseWriter);
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
index dee2612..68ed5e0 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
@@ -28,7 +28,7 @@ public class UnsupportedConversionError extends UnsupportedOperationException {
 
   private static final long serialVersionUID = 1L;
 
-  private UnsupportedConversionError(String message) {
+  public UnsupportedConversionError(String message) {
     super(message);
   }
 
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
index 08e4ac3..9c2e986 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
@@ -17,16 +17,13 @@
  */
 package org.apache.drill.exec.vector.accessor.writer;
 
-import java.math.BigDecimal;
-
 import org.apache.drill.exec.record.metadata.ColumnMetadata;
 import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
 import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
 import org.apache.drill.exec.vector.accessor.ObjectType;
 import org.apache.drill.exec.vector.accessor.ScalarWriter;
-import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
 import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
-import org.joda.time.Period;
 
 /**
  * Column writer implementation that acts as the basis for the
@@ -35,14 +32,20 @@ import org.joda.time.Period;
  * method(s).
  */
 
-public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents {
+public abstract class AbstractScalarWriter extends ConcreteWriter {
 
   public static class ScalarObjectWriter extends AbstractObjectWriter {
 
-    private AbstractScalarWriter scalarWriter;
+    private ConcreteWriter scalarWriter;
 
-    public ScalarObjectWriter(AbstractScalarWriter scalarWriter) {
-      this.scalarWriter = scalarWriter;
+    public ScalarObjectWriter(ConcreteWriter scalarWriter) {
+      final ColumnMetadata metadata = scalarWriter.schema();
+      final ColumnConversionFactory factory = metadata.typeConverter();
+      if (factory == null) {
+        this.scalarWriter = scalarWriter;
+      } else {
+        this.scalarWriter = factory.newWriter(metadata, scalarWriter);
+      }
     }
 
     @Override
@@ -111,40 +114,7 @@ public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents
   @Override
   public void saveRow() { }
 
-  protected UnsupportedConversionError conversionError(String javaType) {
-    return UnsupportedConversionError.writeError(schema(), javaType);
-  }
-
   @Override
-  public void setObject(Object value) {
-    if (value == null) {
-      setNull();
-    } else if (value instanceof Integer) {
-      setInt((Integer) value);
-    } else if (value instanceof Long) {
-      setLong((Long) value);
-    } else if (value instanceof String) {
-      setString((String) value);
-    } else if (value instanceof BigDecimal) {
-      setDecimal((BigDecimal) value);
-    } else if (value instanceof Period) {
-      setPeriod((Period) value);
-    } else if (value instanceof byte[]) {
-      byte[] bytes = (byte[]) value;
-      setBytes(bytes, bytes.length);
-    } else if (value instanceof Byte) {
-      setInt((Byte) value);
-    } else if (value instanceof Short) {
-      setInt((Short) value);
-    } else if (value instanceof Double) {
-      setDouble((Double) value);
-    } else if (value instanceof Float) {
-      setDouble((Float) value);
-    } else {
-      throw conversionError(value.getClass().getSimpleName());
-    }
-  }
-
   public void dump(HierarchicalFormatter format) {
     format
       .startObject(this)
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java
new file mode 100644
index 0000000..0d0bc88
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.joda.time.Period;
+
+/**
+ * Base class for type converting scalar column writers. All methods
+ * pass through to the base writer. Override selected "set" methods to
+ * perform the type conversion, such as overriding "setString" to convert
+ * from a string representation of a value to the actual format.
+ * <p>
+ * The {@link #setObject()} method works here: the object is passed
+ * to this class's set methods, allowing, say, setting a string object
+ * for an int column in the case above.
+ */
+
+// TODO: This organization works fine, but is a bit heavy-weight.
+// It may be time to think about separating the pure writer aspect of
+// a column writer from its plumbing aspects. That is, the base
+// ConcreteWriter class combines the public API (ScalarWriter) with
+// the internal implementation (WriterEvents) into a single class.
+// Might be worth using composition rather than inheritance to keep
+// these aspects distinct.
+
+public class AbstractWriteConvertor extends ConcreteWriter {
+
+  private final ConcreteWriter baseWriter;
+
+  public AbstractWriteConvertor(ScalarWriter baseWriter) {
+    this.baseWriter = (ConcreteWriter) baseWriter;
+  }
+
+  @Override
+  public ValueType valueType() {
+    return baseWriter.valueType();
+  }
+
+  @Override
+  public int lastWriteIndex() {
+    return baseWriter.lastWriteIndex();
+  }
+
+  @Override
+  public void restartRow() {
+    baseWriter.restartRow();
+  }
+
+  @Override
+  public void endWrite() {
+    baseWriter.endWrite();
+  }
+
+  @Override
+  public void preRollover() {
+    baseWriter.preRollover();
+  }
+
+  @Override
+  public void postRollover() {
+    baseWriter.postRollover();
+  }
+
+  @Override
+  public ObjectType type() {
+    return baseWriter.type();
+  }
+
+  @Override
+  public boolean nullable() {
+    return baseWriter.nullable();
+  }
+
+  @Override
+  public ColumnMetadata schema() {
+    return baseWriter.schema();
+  }
+
+  @Override
+  public void setNull() {
+    baseWriter.setNull();
+  }
+
+  @Override
+  public int rowStartIndex() {
+    return baseWriter.rowStartIndex();
+  }
+
+  @Override
+  public int writeIndex() {
+    return baseWriter.writeIndex();
+  }
+
+  @Override
+  public void bindListener(ColumnWriterListener listener) {
+    baseWriter.bindListener(listener);
+  }
+
+  @Override
+  public void bindIndex(ColumnWriterIndex index) {
+    baseWriter.bindIndex(index);
+  }
+
+  @Override
+  public void startWrite() {
+    baseWriter.startWrite();
+  }
+
+  @Override
+  public void startRow() {
+    baseWriter.startRow();
+  }
+
+  @Override
+  public void endArrayValue() {
+    baseWriter.endArrayValue();
+  }
+
+  @Override
+  public void saveRow() {
+    baseWriter.saveRow();
+  }
+
+  @Override
+  public void setInt(int value) {
+    baseWriter.setInt(value);
+  }
+
+  @Override
+  public void setLong(long value) {
+    baseWriter.setLong(value);
+  }
+
+  @Override
+  public void setDouble(double value) {
+    baseWriter.setDouble(value);
+  }
+
+  @Override
+  public void setString(String value) {
+    baseWriter.setString(value);
+  }
+
+  @Override
+  public void setBytes(byte[] value, int len) {
+    baseWriter.setBytes(value, len);
+  }
+
+  @Override
+  public void setDecimal(BigDecimal value) {
+    baseWriter.setDecimal(value);
+  }
+
+  @Override
+  public void setPeriod(Period value) {
+    baseWriter.setPeriod(value);
+  }
+
+  @Override
+  public void dump(HierarchicalFormatter format) {
+    baseWriter.dump(format);
+  }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java
new file mode 100644
index 0000000..549431f
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.joda.time.Period;
+
+/**
+ * Base class for concrete scalar column writers including actual vector
+ * writers, wrappers for nullable types, and shims used to convert types.
+ */
+
+public abstract class ConcreteWriter implements ScalarWriter, WriterEvents {
+
+  @Override
+  public void setObject(Object value) {
+    if (value == null) {
+      setNull();
+    } else if (value instanceof Integer) {
+      setInt((Integer) value);
+    } else if (value instanceof Long) {
+      setLong((Long) value);
+    } else if (value instanceof String) {
+      setString((String) value);
+    } else if (value instanceof BigDecimal) {
+      setDecimal((BigDecimal) value);
+    } else if (value instanceof Period) {
+      setPeriod((Period) value);
+    } else if (value instanceof byte[]) {
+      final byte[] bytes = (byte[]) value;
+      setBytes(bytes, bytes.length);
+    } else if (value instanceof Byte) {
+      setInt((Byte) value);
+    } else if (value instanceof Short) {
+      setInt((Short) value);
+    } else if (value instanceof Double) {
+      setDouble((Double) value);
+    } else if (value instanceof Float) {
+      setDouble((Float) value);
+    } else {
+      throw conversionError(value.getClass().getSimpleName());
+    }
+  }
+
+  protected UnsupportedConversionError conversionError(String javaType) {
+    return UnsupportedConversionError.writeError(schema(), javaType);
+  }
+
+  abstract void dump(HierarchicalFormatter format);
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
index 2ac7d45..f271bfa 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
@@ -60,13 +60,17 @@ public class ScalarArrayWriter extends BaseArrayWriter {
     public final void nextElement() { next(); }
   }
 
-  private final BaseScalarWriter elementWriter;
+  private final ConcreteWriter elementWriter;
 
   public ScalarArrayWriter(ColumnMetadata schema,
       RepeatedValueVector vector, BaseScalarWriter elementWriter) {
     super(schema, vector.getOffsetVector(),
         new ScalarObjectWriter(elementWriter));
-    this.elementWriter = elementWriter;
+
+    // Save the writer from the scalar object writer created above
+    // which may have wrapped the element writer in a type convertor.
+
+    this.elementWriter = (ConcreteWriter) elementObjWriter.scalar();
   }
 
   public static ArrayObjectWriter build(ColumnMetadata schema,
@@ -110,7 +114,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
 
       return;
     }
-    String objClass = array.getClass().getName();
+    final String objClass = array.getClass().getName();
     if (! objClass.startsWith("[")) {
       throw new IllegalArgumentException(
           String.format("Argument must be an array. Column `%s`, value = %s",
@@ -119,12 +123,12 @@ public class ScalarArrayWriter extends BaseArrayWriter {
 
     // Figure out type
 
-    char second = objClass.charAt(1);
+    final char second = objClass.charAt(1);
     switch ( second ) {
     case  '[':
       // bytes is represented as an array of byte arrays.
 
-      char third = objClass.charAt(2);
+      final char third = objClass.charAt(2);
       switch (third) {
       case 'B':
         setBytesArray((byte[][]) array);
@@ -157,11 +161,11 @@ public class ScalarArrayWriter extends BaseArrayWriter {
       setBooleanArray((boolean[]) array);
       break;
     case 'L':
-      int posn = objClass.indexOf(';');
+      final int posn = objClass.indexOf(';');
 
       // If the array is of type Object, then we have no type info.
 
-      String memberClassName = objClass.substring(2, posn);
+      final String memberClassName = objClass.substring(2, posn);
       if (memberClassName.equals(String.class.getName())) {
         setStringArray((String[]) array);
       } else if (memberClassName.equals(Period.class.getName())) {
@@ -215,7 +219,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
 
   public void setIntObjectArray(Integer[] value) {
     for (int i = 0; i < value.length; i++) {
-      Integer element = value[i];
+      final Integer element = value[i];
       if (element == null) {
         elementWriter.setNull();
       } else {
@@ -232,7 +236,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
 
   public void setLongObjectArray(Long[] value) {
     for (int i = 0; i < value.length; i++) {
-      Long element = value[i];
+      final Long element = value[i];
       if (element == null) {
         elementWriter.setNull();
       } else {
@@ -255,7 +259,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
 
   public void setDoubleObjectArray(Double[] value) {
     for (int i = 0; i < value.length; i++) {
-      Double element = value[i];
+      final Double element = value[i];
       if (element == null) {
         elementWriter.setNull();
       } else {