You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/09/17 22:12:16 UTC

svn commit: r1524203 - in /hive/branches/branch-0.12: ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/queries/clientpositive/ ql/...

Author: thejas
Date: Tue Sep 17 20:12:16 2013
New Revision: 1524203

URL: http://svn.apache.org/r1524203
Log:
HIVE-5161 : Additional SerDe support for varchar type (Jason Dere via Ashutosh Chauhan)

Added:
    hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q
    hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out
Modified:
    hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
    hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
    hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
    hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
    hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
    hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
    hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java

Modified: hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Tue Sep 17 20:12:16 2013
@@ -7003,6 +7003,10 @@ public final class OrcProto {
     java.util.List<String> getFieldNamesList();
     int getFieldNamesCount();
     String getFieldNames(int index);
+    
+    // optional uint32 maximumLength = 4;
+    boolean hasMaximumLength();
+    int getMaximumLength();
   }
   public static final class Type extends
       com.google.protobuf.GeneratedMessage
@@ -7050,6 +7054,7 @@ public final class OrcProto {
       UNION(13, 13),
       DECIMAL(14, 14),
       DATE(15, 15),
+      VARCHAR(16, 16),
       ;
       
       public static final int BOOLEAN_VALUE = 0;
@@ -7068,6 +7073,7 @@ public final class OrcProto {
       public static final int UNION_VALUE = 13;
       public static final int DECIMAL_VALUE = 14;
       public static final int DATE_VALUE = 15;
+      public static final int VARCHAR_VALUE = 16;
       
       
       public final int getNumber() { return value; }
@@ -7090,6 +7096,7 @@ public final class OrcProto {
           case 13: return UNION;
           case 14: return DECIMAL;
           case 15: return DATE;
+          case 16: return VARCHAR;
           default: return null;
         }
       }
@@ -7120,7 +7127,7 @@ public final class OrcProto {
       }
       
       private static final Kind[] VALUES = {
-        BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP, LIST, MAP, STRUCT, UNION, DECIMAL, DATE, 
+        BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP, LIST, MAP, STRUCT, UNION, DECIMAL, DATE, VARCHAR, 
       };
       
       public static Kind valueOf(
@@ -7183,10 +7190,21 @@ public final class OrcProto {
       return fieldNames_.get(index);
     }
     
+    // optional uint32 maximumLength = 4;
+    public static final int MAXIMUMLENGTH_FIELD_NUMBER = 4;
+    private int maximumLength_;
+    public boolean hasMaximumLength() {
+      return ((bitField0_ & 0x00000002) == 0x00000002);
+    }
+    public int getMaximumLength() {
+      return maximumLength_;
+    }
+    
     private void initFields() {
       kind_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Kind.BOOLEAN;
       subtypes_ = java.util.Collections.emptyList();;
       fieldNames_ = com.google.protobuf.LazyStringArrayList.EMPTY;
+      maximumLength_ = 0;
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -7217,6 +7235,9 @@ public final class OrcProto {
       for (int i = 0; i < fieldNames_.size(); i++) {
         output.writeBytes(3, fieldNames_.getByteString(i));
       }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        output.writeUInt32(4, maximumLength_);
+      }
       getUnknownFields().writeTo(output);
     }
     
@@ -7253,6 +7274,10 @@ public final class OrcProto {
         size += dataSize;
         size += 1 * getFieldNamesList().size();
       }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeUInt32Size(4, maximumLength_);
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -7383,6 +7408,8 @@ public final class OrcProto {
         bitField0_ = (bitField0_ & ~0x00000002);
         fieldNames_ = com.google.protobuf.LazyStringArrayList.EMPTY;
         bitField0_ = (bitField0_ & ~0x00000004);
+        maximumLength_ = 0;
+        bitField0_ = (bitField0_ & ~0x00000008);
         return this;
       }
       
@@ -7436,6 +7463,10 @@ public final class OrcProto {
           bitField0_ = (bitField0_ & ~0x00000004);
         }
         result.fieldNames_ = fieldNames_;
+        if (((from_bitField0_ & 0x00000008) == 0x00000008)) {
+          to_bitField0_ |= 0x00000002;
+        }
+        result.maximumLength_ = maximumLength_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -7475,6 +7506,9 @@ public final class OrcProto {
           }
           onChanged();
         }
+        if (other.hasMaximumLength()) {
+          setMaximumLength(other.getMaximumLength());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -7540,6 +7574,11 @@ public final class OrcProto {
               fieldNames_.add(input.readBytes());
               break;
             }
+            case 32: {
+              bitField0_ |= 0x00000008;
+              maximumLength_ = input.readUInt32();
+              break;
+            }
           }
         }
       }
@@ -7671,6 +7710,27 @@ public final class OrcProto {
         onChanged();
       }
       
+      // optional uint32 maximumLength = 4;
+      private int maximumLength_ ;
+      public boolean hasMaximumLength() {
+        return ((bitField0_ & 0x00000008) == 0x00000008);
+      }
+      public int getMaximumLength() {
+        return maximumLength_;
+      }
+      public Builder setMaximumLength(int value) {
+        bitField0_ |= 0x00000008;
+        maximumLength_ = value;
+        onChanged();
+        return this;
+      }
+      public Builder clearMaximumLength() {
+        bitField0_ = (bitField0_ & ~0x00000008);
+        maximumLength_ = 0;
+        onChanged();
+        return this;
+      }
+      
       // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.Type)
     }
     
@@ -11132,33 +11192,34 @@ public final class OrcProto {
       "treams\030\001 \003(\0132(.org.apache.hadoop.hive.ql" +
       ".io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org.ap",
       "ache.hadoop.hive.ql.io.orc.ColumnEncodin" +
-      "g\"\250\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache.ha" +
+      "g\"\314\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache.ha" +
       "doop.hive.ql.io.orc.Type.Kind\022\024\n\010subtype" +
-      "s\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\"\272\001\n\004Kind" +
-      "\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003IN" +
-      "T\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006" +
-      "STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004L" +
-      "IST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n" +
-      "\007DECIMAL\020\016\022\010\n\004DATE\020\017\"x\n\021StripeInformatio" +
-      "n\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022",
-      "\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004" +
-      "\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataIt" +
-      "em\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Foot" +
-      "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
-      "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
-      "doop.hive.ql.io.orc.StripeInformation\0225\n" +
-      "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql" +
-      ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
-      "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
-      "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007",
-      " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
-      "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
-      "\"\255\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
-      "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
-      "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
-      "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001" +
-      "\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004N" +
-      "ONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+      "s\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmaxim" +
+      "umLength\030\004 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004" +
+      "BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005" +
+      "FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINAR" +
+      "Y\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n" +
+      "\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DAT" +
+      "E\020\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n",
+      "\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\nda" +
+      "taLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014" +
+      "numberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014" +
+      "\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024" +
+      "\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 " +
+      "\001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.hadoop" +
+      ".hive.ql.io.orc.StripeInformation\0225\n\005typ" +
+      "es\030\004 \003(\0132&.org.apache.hadoop.hive.ql.io." +
+      "orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apache." +
+      "hadoop.hive.ql.io.orc.UserMetadataItem\022\024",
+      "\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\013" +
+      "22.org.apache.hadoop.hive.ql.io.orc.Colu" +
+      "mnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n" +
+      "\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013com" +
+      "pression\030\002 \001(\01621.org.apache.hadoop.hive." +
+      "ql.io.orc.CompressionKind\022\034\n\024compression" +
+      "BlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005" +
+      "magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020" +
+      "\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -11266,7 +11327,7 @@ public final class OrcProto {
           internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor,
-              new java.lang.String[] { "Kind", "Subtypes", "FieldNames", },
+              new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", },
               org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.class,
               org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Builder.class);
           internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor =

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Sep 17 20:12:16 2013
@@ -714,6 +714,7 @@ class ColumnStatisticsImpl implements Co
           case DOUBLE:
             return new DoubleStatisticsImpl();
           case STRING:
+          case VARCHAR:
             return new StringStatisticsImpl();
           case DECIMAL:
             return new DecimalStatisticsImpl();

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Tue Sep 17 20:12:16 2013
@@ -33,13 +33,17 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
 import org.apache.hadoop.io.Writable;
 
 final class OrcStruct implements Writable {
@@ -473,6 +477,15 @@ final class OrcStruct implements Writabl
             return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
           case STRING:
             return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+          case VARCHAR:
+            // For varchar we need to retrieve the string length from the TypeInfo.
+            VarcharTypeParams varcharParams = (VarcharTypeParams)
+                ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(info);
+            if (varcharParams == null) {
+              throw new IllegalArgumentException("varchar type used without type params");
+            }
+            return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+                (PrimitiveTypeInfo) info);
           case TIMESTAMP:
             return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
           case DATE:
@@ -519,6 +532,16 @@ final class OrcStruct implements Writabl
         return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
       case STRING:
         return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+      case VARCHAR:
+        if (!type.hasMaximumLength()) {
+          throw new UnsupportedOperationException(
+              "Illegal use of varchar type without length in ORC type definition.");
+        }
+        VarcharTypeParams varcharParams = new VarcharTypeParams();
+        varcharParams.setLength(type.getMaximumLength());
+        return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+            PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs(
+                PrimitiveCategory.VARCHAR, varcharParams));
       case TIMESTAMP:
         return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
       case DATE:

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Sep 17 20:12:16 2013
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.io.sarg
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
@@ -1075,6 +1076,34 @@ class RecordReaderImpl implements Record
     }
   }
 
+  private static class VarcharTreeReader extends StringTreeReader {
+    int maxLength;
+
+    VarcharTreeReader(Path path, int columnId, int maxLength) {
+      super(path, columnId);
+      this.maxLength = maxLength;
+    }
+
+    @Override
+    Object next(Object previous) throws IOException {
+      HiveVarcharWritable result = null;
+      if (previous == null) {
+        result = new HiveVarcharWritable();
+      } else {
+        result = (HiveVarcharWritable) previous;
+      }
+      // Use the string reader implementation to populate the internal Text value
+      Object textVal = super.next(result.getTextValue());
+      if (textVal == null) {
+        return null;
+      }
+      // result should now hold the value that was read in.
+      // enforce varchar length
+      result.enforceMaxLength(maxLength);
+      return result;
+    }
+  }
+
   private static class StructTreeReader extends TreeReader {
     private final TreeReader[] fields;
     private final String[] fieldNames;
@@ -1426,6 +1455,11 @@ class RecordReaderImpl implements Record
         return new LongTreeReader(path, columnId);
       case STRING:
         return new StringTreeReader(path, columnId);
+      case VARCHAR:
+        if (!type.hasMaximumLength()) {
+          throw new IllegalArgumentException("ORC varchar type has no length specified");
+        }
+        return new VarcharTreeReader(path, columnId, type.getMaximumLength());
       case BINARY:
         return new BinaryTreeReader(path, columnId);
       case TIMESTAMP:

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Sep 17 20:12:16 2013
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
@@ -54,11 +55,14 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
@@ -876,12 +880,21 @@ class WriterImpl implements Writer, Memo
           defaultFloatVal);
     }
 
+    /**
+     * Method to retrieve string values from the value object, which can be overridden
+     * by subclasses.
+     * @param obj  value
+     * @return String value from obj
+     */
+    String getStringValue(Object obj) {
+      return ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj);
+    }
+
     @Override
     void write(Object obj) throws IOException {
       super.write(obj);
       if (obj != null) {
-        String val = ((StringObjectInspector) inspector)
-          .getPrimitiveJavaObject(obj);
+        String val = getStringValue(obj);
         rows.add(dictionary.add(val));
         indexStatistics.updateString(val);
       }
@@ -1014,6 +1027,28 @@ class WriterImpl implements Writer, Memo
     }
   }
 
+  /**
+   * Under the covers, varchar is written to ORC the same way as string.
+   */
+  private static class VarcharTreeWriter extends StringTreeWriter {
+
+    VarcharTreeWriter(int columnId,
+        ObjectInspector inspector,
+        StreamFactory writer,
+        boolean nullable) throws IOException {
+      super(columnId, inspector, writer, nullable);
+    }
+
+    /**
+     * Override base class implementation to support varchar values.
+     */
+    @Override
+    String getStringValue(Object obj) {
+      return (((HiveVarcharObjectInspector) inspector)
+          .getPrimitiveJavaObject(obj)).getValue();
+    }
+  }
+
   private static class BinaryTreeWriter extends TreeWriter {
     private final PositionedOutputStream stream;
     private final IntegerWriter length;
@@ -1500,6 +1535,9 @@ class WriterImpl implements Writer, Memo
           case STRING:
             return new StringTreeWriter(streamFactory.getNextColumnId(),
                 inspector, streamFactory, nullable);
+          case VARCHAR:
+            return new VarcharTreeWriter(streamFactory.getNextColumnId(),
+                inspector, streamFactory, nullable);
           case BINARY:
             return new BinaryTreeWriter(streamFactory.getNextColumnId(),
                 inspector, streamFactory, nullable);
@@ -1565,6 +1603,18 @@ class WriterImpl implements Writer, Memo
           case STRING:
             type.setKind(OrcProto.Type.Kind.STRING);
             break;
+          case VARCHAR:
+            // The varchar length needs to be written to file and should be available
+            // from the object inspector
+            VarcharTypeParams varcharParams = (VarcharTypeParams)
+                ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveObjectInspector(
+                    (PrimitiveObjectInspector) treeWriter.inspector);
+            if (varcharParams == null) {
+              throw new IllegalArgumentException("No varchar length specified in ORC type");
+            }
+            type.setKind(Type.Kind.VARCHAR);
+            type.setMaximumLength(varcharParams.getLength());
+            break;
           case BINARY:
             type.setKind(OrcProto.Type.Kind.BINARY);
             break;

Modified: hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Tue Sep 17 20:12:16 2013
@@ -103,10 +103,12 @@ message Type {
     UNION = 13;
     DECIMAL = 14;
     DATE = 15;
+    VARCHAR = 16;
   }
   required Kind kind = 1;
   repeated uint32 subtypes = 2 [packed=true];
   repeated string fieldNames = 3;
+  optional uint32 maximumLength = 4;
 }
 
 message StripeInformation {

Added: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q?rev=1524203&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q (added)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q Tue Sep 17 20:12:16 2013
@@ -0,0 +1,102 @@
+drop table if exists varchar_serde_regex;
+drop table if exists varchar_serde_lb;
+drop table if exists varchar_serde_ls;
+drop table if exists varchar_serde_c;
+drop table if exists varchar_serde_lbc;
+drop table if exists varchar_serde_orc;
+
+--
+-- RegexSerDe
+--
+create table  varchar_serde_regex (
+  key varchar(10),
+  value varchar(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+  "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile;
+
+load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex;
+
+select * from varchar_serde_regex limit 5;
+select value, count(*) from varchar_serde_regex group by value limit 5;
+
+--
+-- LazyBinary
+--
+create table  varchar_serde_lb (
+  key varchar(10),
+  value varchar(20)
+);
+alter table varchar_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
+
+insert overwrite table varchar_serde_lb
+  select key, value from varchar_serde_regex;
+select * from varchar_serde_lb limit 5;
+select value, count(*) from varchar_serde_lb group by value limit 5;
+
+--
+-- LazySimple
+--
+create table  varchar_serde_ls (
+  key varchar(10),
+  value varchar(20)
+);
+alter table varchar_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+
+insert overwrite table varchar_serde_ls
+  select key, value from varchar_serde_lb;
+select * from varchar_serde_ls limit 5;
+select value, count(*) from varchar_serde_ls group by value limit 5;
+
+--
+-- Columnar
+--
+create table  varchar_serde_c (
+  key varchar(10),
+  value varchar(20)
+) stored as rcfile;
+alter table varchar_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+
+insert overwrite table varchar_serde_c
+  select key, value from varchar_serde_ls;
+select * from varchar_serde_c limit 5;
+select value, count(*) from varchar_serde_c group by value limit 5;
+
+--
+-- LazyBinaryColumnar
+--
+create table varchar_serde_lbc (
+  key varchar(10),
+  value varchar(20)
+) stored as rcfile;
+alter table varchar_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
+
+insert overwrite table varchar_serde_lbc
+  select key, value from varchar_serde_c;
+select * from varchar_serde_lbc limit 5;
+select value, count(*) from varchar_serde_lbc group by value limit 5;
+
+--
+-- ORC
+--
+create table varchar_serde_orc (
+  key varchar(10),
+  value varchar(20)
+) stored as orc;
+alter table varchar_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+
+
+insert overwrite table varchar_serde_orc
+  select key, value from varchar_serde_lbc;
+select * from varchar_serde_orc limit 5;
+select value, count(*) from varchar_serde_orc group by value limit 5;
+
+drop table if exists varchar_serde_regex;
+drop table if exists varchar_serde_lb;
+drop table if exists varchar_serde_ls;
+drop table if exists varchar_serde_c;
+drop table if exists varchar_serde_lbc;
+drop table if exists varchar_serde_orc;

Added: hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out?rev=1524203&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out (added)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out Tue Sep 17 20:12:16 2013
@@ -0,0 +1,626 @@
+PREHOOK: query: drop table if exists varchar_serde_regex
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_regex
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_lb
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_lb
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_ls
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_ls
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_c
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_c
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_lbc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_lbc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_orc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_orc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: --
+-- RegexSerDe
+--
+create table  varchar_serde_regex (
+  key varchar(10),
+  value varchar(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+  "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- RegexSerDe
+--
+create table  varchar_serde_regex (
+  key varchar(10),
+  value varchar(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+  "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_regex
+PREHOOK: query: load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex
+PREHOOK: type: LOAD
+PREHOOK: Output: default@varchar_serde_regex
+POSTHOOK: query: load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@varchar_serde_regex
+PREHOOK: query: select * from varchar_serde_regex limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_regex limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+474	val_475
+62	val_63
+468	val_469
+272	val_273
+448	val_449
+PREHOOK: query: select value, count(*) from varchar_serde_regex group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_regex group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+val_0	3
+val_1	2
+val_10	1
+val_100	2
+val_101	2
+PREHOOK: query: --
+-- LazyBinary
+--
+create table  varchar_serde_lb (
+  key varchar(10),
+  value varchar(20)
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazyBinary
+--
+create table  varchar_serde_lb (
+  key varchar(10),
+  value varchar(20)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_lb
+PREHOOK: query: alter table varchar_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_lb
+PREHOOK: Output: default@varchar_serde_lb
+POSTHOOK: query: alter table varchar_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_lb
+POSTHOOK: Output: default@varchar_serde_lb
+PREHOOK: query: insert overwrite table varchar_serde_lb
+  select key, value from varchar_serde_regex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_regex
+PREHOOK: Output: default@varchar_serde_lb
+POSTHOOK: query: insert overwrite table varchar_serde_lb
+  select key, value from varchar_serde_regex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_regex
+POSTHOOK: Output: default@varchar_serde_lb
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_lb limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_lb limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474	val_475
+62	val_63
+468	val_469
+272	val_273
+448	val_449
+PREHOOK: query: select value, count(*) from varchar_serde_lb group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_lb group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0	3
+val_1	2
+val_10	1
+val_100	2
+val_101	2
+PREHOOK: query: --
+-- LazySimple
+--
+create table  varchar_serde_ls (
+  key varchar(10),
+  value varchar(20)
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazySimple
+--
+create table  varchar_serde_ls (
+  key varchar(10),
+  value varchar(20)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_ls
+PREHOOK: Output: default@varchar_serde_ls
+POSTHOOK: query: alter table varchar_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_ls
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_ls
+  select key, value from varchar_serde_lb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lb
+PREHOOK: Output: default@varchar_serde_ls
+POSTHOOK: query: insert overwrite table varchar_serde_ls
+  select key, value from varchar_serde_lb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lb
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_ls limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_ls limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474	val_475
+62	val_63
+468	val_469
+272	val_273
+448	val_449
+PREHOOK: query: select value, count(*) from varchar_serde_ls group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_ls group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0	3
+val_1	2
+val_10	1
+val_100	2
+val_101	2
+PREHOOK: query: --
+-- Columnar
+--
+create table  varchar_serde_c (
+  key varchar(10),
+  value varchar(20)
+) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- Columnar
+--
+create table  varchar_serde_c (
+  key varchar(10),
+  value varchar(20)
+) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_c
+PREHOOK: Output: default@varchar_serde_c
+POSTHOOK: query: alter table varchar_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_c
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_c
+  select key, value from varchar_serde_ls
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_ls
+PREHOOK: Output: default@varchar_serde_c
+POSTHOOK: query: insert overwrite table varchar_serde_c
+  select key, value from varchar_serde_ls
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_ls
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_c limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_c limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474	val_475
+62	val_63
+468	val_469
+272	val_273
+448	val_449
+PREHOOK: query: select value, count(*) from varchar_serde_c group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_c group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0	3
+val_1	2
+val_10	1
+val_100	2
+val_101	2
+PREHOOK: query: --
+-- LazyBinaryColumnar
+--
+create table varchar_serde_lbc (
+  key varchar(10),
+  value varchar(20)
+) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazyBinaryColumnar
+--
+create table varchar_serde_lbc (
+  key varchar(10),
+  value varchar(20)
+) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_lbc
+PREHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: query: alter table varchar_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_lbc
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_lbc
+  select key, value from varchar_serde_c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_c
+PREHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: query: insert overwrite table varchar_serde_lbc
+  select key, value from varchar_serde_c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_c
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_lbc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_lbc limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474	val_475
+62	val_63
+468	val_469
+272	val_273
+448	val_449
+PREHOOK: query: select value, count(*) from varchar_serde_lbc group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_lbc group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0	3
+val_1	2
+val_10	1
+val_100	2
+val_101	2
+PREHOOK: query: --
+-- ORC
+--
+create table varchar_serde_orc (
+  key varchar(10),
+  value varchar(20)
+) stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- ORC
+--
+create table varchar_serde_orc (
+  key varchar(10),
+  value varchar(20)
+) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_orc
+PREHOOK: Output: default@varchar_serde_orc
+POSTHOOK: query: alter table varchar_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_orc
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_orc
+  select key, value from varchar_serde_lbc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lbc
+PREHOOK: Output: default@varchar_serde_orc
+POSTHOOK: query: insert overwrite table varchar_serde_orc
+  select key, value from varchar_serde_lbc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lbc
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_orc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_orc limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474	val_475
+62	val_63
+468	val_469
+272	val_273
+448	val_449
+PREHOOK: query: select value, count(*) from varchar_serde_orc group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_orc group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0	3
+val_1	2
+val_10	1
+val_100	2
+val_101	2
+PREHOOK: query: drop table if exists varchar_serde_regex
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_regex
+PREHOOK: Output: default@varchar_serde_regex
+POSTHOOK: query: drop table if exists varchar_serde_regex
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_regex
+POSTHOOK: Output: default@varchar_serde_regex
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_lb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_lb
+PREHOOK: Output: default@varchar_serde_lb
+POSTHOOK: query: drop table if exists varchar_serde_lb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_lb
+POSTHOOK: Output: default@varchar_serde_lb
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_ls
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_ls
+PREHOOK: Output: default@varchar_serde_ls
+POSTHOOK: query: drop table if exists varchar_serde_ls
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_ls
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_c
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_c
+PREHOOK: Output: default@varchar_serde_c
+POSTHOOK: query: drop table if exists varchar_serde_c
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_c
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_lbc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_lbc
+PREHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: query: drop table if exists varchar_serde_lbc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_lbc
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_orc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_orc
+PREHOOK: Output: default@varchar_serde_orc
+POSTHOOK: query: drop table if exists varchar_serde_orc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_orc
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]

Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java Tue Sep 17 20:12:16 2013
@@ -30,13 +30,19 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
@@ -119,7 +125,8 @@ public class RegexSerDe extends Abstract
      */
     List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
     for (int c = 0; c < numColumns; c++) {
-      String typeName = columnTypes.get(c).getTypeName();
+      TypeInfo typeInfo = columnTypes.get(c);
+      String typeName = typeInfo.getTypeName();
       if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
         columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
       } else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) {
@@ -142,6 +149,13 @@ public class RegexSerDe extends Abstract
         columnOIs.add(PrimitiveObjectInspectorFactory.javaDateObjectInspector);
       } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
         columnOIs.add(PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector);
+      }  else if (typeInfo instanceof PrimitiveTypeInfo
+          &&
+          ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) {
+        VarcharTypeParams varcharParams = (VarcharTypeParams)
+            ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo);
+        columnOIs.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+            (PrimitiveTypeInfo) typeInfo));
       } else {
          throw new SerDeException(getClass().getName()
          + " doesn't allow column [" + c + "] named "
@@ -202,7 +216,8 @@ public class RegexSerDe extends Abstract
     for (int c = 0; c < numColumns; c++) {
       try {
         String t = m.group(c+1);
-        String typeName = columnTypes.get(c).getTypeName();
+        TypeInfo typeInfo = columnTypes.get(c);
+        String typeName = typeInfo.getTypeName();
 
         // Convert the column to the correct type when needed and set in row obj
         if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
@@ -247,6 +262,13 @@ public class RegexSerDe extends Abstract
           HiveDecimal bd;
           bd = new HiveDecimal(t);
           row.set(c, bd);
+        } else if (typeInfo instanceof PrimitiveTypeInfo
+            &&
+            ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) {
+          VarcharTypeParams varcharParams = (VarcharTypeParams)
+              ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo);
+          HiveVarchar hv = new HiveVarchar(t, varcharParams != null ? varcharParams.length : -1);
+          row.set(c, hv);
         }
       } catch (RuntimeException e) {
          partialMatchedRowsCount++;

Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java Tue Sep 17 20:12:16 2013
@@ -703,12 +703,9 @@ public class BinarySortableSerDe extends
       case VARCHAR: {
         HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector)poi;
         HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
-        try {
-          ByteBuffer bb = Text.encode(hc.getHiveVarchar().getValue());
-          serializeBytes(buffer, bb.array(), bb.limit(), invert);
-        } catch (CharacterCodingException err) {
-          throw new SerDeException(err);
-        }
+        // use varchar's text field directly
+        Text t = hc.getTextValue();
+        serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
         return;
       }
 

Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Tue Sep 17 20:12:16 2013
@@ -229,8 +229,9 @@ public final class LazyUtils {
 
     case VARCHAR: {
       HiveVarcharWritable hc = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(o);
-      ByteBuffer b = Text.encode(hc.toString());
-      writeEscaped(out, b.array(), 0, b.limit(), escaped, escapeChar, needsEscape);
+      Text t = hc.getTextValue();
+      writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar,
+          needsEscape);
       break;
     }
     case BINARY: {

Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Tue Sep 17 20:12:16 2013
@@ -270,6 +270,17 @@ public class LazyBinarySerDe extends Abs
     return warnedOnceNullMapKey;
   }
 
+  private static void serializeText(Output byteStream, Text t, boolean skipLengthPrefix) {
+    /* write byte size of the string which is a vint */
+    int length = t.getLength();
+    if (!skipLengthPrefix) {
+      LazyBinaryUtils.writeVInt(byteStream, length);
+    }
+    /* write string itself */
+    byte[] data = t.getBytes();
+    byteStream.write(data, 0, length);
+  }
+
   /**
    * A recursive function that serialize an object to a byte buffer based on its
    * object inspector.
@@ -358,32 +369,13 @@ public class LazyBinarySerDe extends Abs
       case STRING: {
         StringObjectInspector soi = (StringObjectInspector) poi;
         Text t = soi.getPrimitiveWritableObject(obj);
-        /* write byte size of the string which is a vint */
-        int length = t.getLength();
-        if (!skipLengthPrefix) {
-          LazyBinaryUtils.writeVInt(byteStream, length);
-        }
-        /* write string itself */
-        byte[] data = t.getBytes();
-        byteStream.write(data, 0, length);
+        serializeText(byteStream, t, skipLengthPrefix);
         return warnedOnceNullMapKey;
       }
       case VARCHAR: {
         HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
-        String value =
-            hcoi.getPrimitiveWritableObject(obj).getHiveVarchar().getValue();
-        int length = value.length();
-        // Write byte size
-        if (!skipLengthPrefix) {
-          LazyBinaryUtils.writeVInt(byteStream, length);
-        }
-        // Write string value
-        try {
-          ByteBuffer bb = Text.encode(value);
-          byteStream.write(bb.array(), 0, bb.limit());
-        } catch (CharacterCodingException err) {
-          throw new SerDeException(err);
-        }
+        Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
+        serializeText(byteStream, t, skipLengthPrefix);
         return warnedOnceNullMapKey;
       }
       case BINARY: {