You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/09/17 22:12:16 UTC
svn commit: r1524203 - in /hive/branches/branch-0.12:
ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/
ql/src/test/queries/clientpositive/ ql/...
Author: thejas
Date: Tue Sep 17 20:12:16 2013
New Revision: 1524203
URL: http://svn.apache.org/r1524203
Log:
HIVE-5161 : Additional SerDe support for varchar type (Jason Dere via Ashutosh Chauhan)
Added:
hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q
hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out
Modified:
hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
Modified: hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/branches/branch-0.12/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Tue Sep 17 20:12:16 2013
@@ -7003,6 +7003,10 @@ public final class OrcProto {
java.util.List<String> getFieldNamesList();
int getFieldNamesCount();
String getFieldNames(int index);
+
+ // optional uint32 maximumLength = 4;
+ boolean hasMaximumLength();
+ int getMaximumLength();
}
public static final class Type extends
com.google.protobuf.GeneratedMessage
@@ -7050,6 +7054,7 @@ public final class OrcProto {
UNION(13, 13),
DECIMAL(14, 14),
DATE(15, 15),
+ VARCHAR(16, 16),
;
public static final int BOOLEAN_VALUE = 0;
@@ -7068,6 +7073,7 @@ public final class OrcProto {
public static final int UNION_VALUE = 13;
public static final int DECIMAL_VALUE = 14;
public static final int DATE_VALUE = 15;
+ public static final int VARCHAR_VALUE = 16;
public final int getNumber() { return value; }
@@ -7090,6 +7096,7 @@ public final class OrcProto {
case 13: return UNION;
case 14: return DECIMAL;
case 15: return DATE;
+ case 16: return VARCHAR;
default: return null;
}
}
@@ -7120,7 +7127,7 @@ public final class OrcProto {
}
private static final Kind[] VALUES = {
- BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP, LIST, MAP, STRUCT, UNION, DECIMAL, DATE,
+ BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP, LIST, MAP, STRUCT, UNION, DECIMAL, DATE, VARCHAR,
};
public static Kind valueOf(
@@ -7183,10 +7190,21 @@ public final class OrcProto {
return fieldNames_.get(index);
}
+ // optional uint32 maximumLength = 4;
+ public static final int MAXIMUMLENGTH_FIELD_NUMBER = 4;
+ private int maximumLength_;
+ public boolean hasMaximumLength() {
+ return ((bitField0_ & 0x00000002) == 0x00000002);
+ }
+ public int getMaximumLength() {
+ return maximumLength_;
+ }
+
private void initFields() {
kind_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Kind.BOOLEAN;
subtypes_ = java.util.Collections.emptyList();;
fieldNames_ = com.google.protobuf.LazyStringArrayList.EMPTY;
+ maximumLength_ = 0;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -7217,6 +7235,9 @@ public final class OrcProto {
for (int i = 0; i < fieldNames_.size(); i++) {
output.writeBytes(3, fieldNames_.getByteString(i));
}
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ output.writeUInt32(4, maximumLength_);
+ }
getUnknownFields().writeTo(output);
}
@@ -7253,6 +7274,10 @@ public final class OrcProto {
size += dataSize;
size += 1 * getFieldNamesList().size();
}
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeUInt32Size(4, maximumLength_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -7383,6 +7408,8 @@ public final class OrcProto {
bitField0_ = (bitField0_ & ~0x00000002);
fieldNames_ = com.google.protobuf.LazyStringArrayList.EMPTY;
bitField0_ = (bitField0_ & ~0x00000004);
+ maximumLength_ = 0;
+ bitField0_ = (bitField0_ & ~0x00000008);
return this;
}
@@ -7436,6 +7463,10 @@ public final class OrcProto {
bitField0_ = (bitField0_ & ~0x00000004);
}
result.fieldNames_ = fieldNames_;
+ if (((from_bitField0_ & 0x00000008) == 0x00000008)) {
+ to_bitField0_ |= 0x00000002;
+ }
+ result.maximumLength_ = maximumLength_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -7475,6 +7506,9 @@ public final class OrcProto {
}
onChanged();
}
+ if (other.hasMaximumLength()) {
+ setMaximumLength(other.getMaximumLength());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -7540,6 +7574,11 @@ public final class OrcProto {
fieldNames_.add(input.readBytes());
break;
}
+ case 32: {
+ bitField0_ |= 0x00000008;
+ maximumLength_ = input.readUInt32();
+ break;
+ }
}
}
}
@@ -7671,6 +7710,27 @@ public final class OrcProto {
onChanged();
}
+ // optional uint32 maximumLength = 4;
+ private int maximumLength_ ;
+ public boolean hasMaximumLength() {
+ return ((bitField0_ & 0x00000008) == 0x00000008);
+ }
+ public int getMaximumLength() {
+ return maximumLength_;
+ }
+ public Builder setMaximumLength(int value) {
+ bitField0_ |= 0x00000008;
+ maximumLength_ = value;
+ onChanged();
+ return this;
+ }
+ public Builder clearMaximumLength() {
+ bitField0_ = (bitField0_ & ~0x00000008);
+ maximumLength_ = 0;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.Type)
}
@@ -11132,33 +11192,34 @@ public final class OrcProto {
"treams\030\001 \003(\0132(.org.apache.hadoop.hive.ql" +
".io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org.ap",
"ache.hadoop.hive.ql.io.orc.ColumnEncodin" +
- "g\"\250\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache.ha" +
+ "g\"\314\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache.ha" +
"doop.hive.ql.io.orc.Type.Kind\022\024\n\010subtype" +
- "s\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\"\272\001\n\004Kind" +
- "\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003IN" +
- "T\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006" +
- "STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004L" +
- "IST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n" +
- "\007DECIMAL\020\016\022\010\n\004DATE\020\017\"x\n\021StripeInformatio" +
- "n\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022",
- "\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004" +
- "\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataIt" +
- "em\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Foot" +
- "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
- "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
- "doop.hive.ql.io.orc.StripeInformation\0225\n" +
- "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql" +
- ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
- "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
- "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007",
- " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
- "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
- "\"\255\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
- "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
- "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
- "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001" +
- "\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004N" +
- "ONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+ "s\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmaxim" +
+ "umLength\030\004 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004" +
+ "BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005" +
+ "FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINAR" +
+ "Y\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n" +
+ "\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DAT" +
+ "E\020\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n",
+ "\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\nda" +
+ "taLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014" +
+ "numberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014" +
+ "\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024" +
+ "\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 " +
+ "\001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.hadoop" +
+ ".hive.ql.io.orc.StripeInformation\0225\n\005typ" +
+ "es\030\004 \003(\0132&.org.apache.hadoop.hive.ql.io." +
+ "orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apache." +
+ "hadoop.hive.ql.io.orc.UserMetadataItem\022\024",
+ "\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\013" +
+ "22.org.apache.hadoop.hive.ql.io.orc.Colu" +
+ "mnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n" +
+ "\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013com" +
+ "pression\030\002 \001(\01621.org.apache.hadoop.hive." +
+ "ql.io.orc.CompressionKind\022\034\n\024compression" +
+ "BlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005" +
+ "magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020" +
+ "\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -11266,7 +11327,7 @@ public final class OrcProto {
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor,
- new java.lang.String[] { "Kind", "Subtypes", "FieldNames", },
+ new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", },
org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.class,
org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Builder.class);
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor =
Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Sep 17 20:12:16 2013
@@ -714,6 +714,7 @@ class ColumnStatisticsImpl implements Co
case DOUBLE:
return new DoubleStatisticsImpl();
case STRING:
+ case VARCHAR:
return new StringStatisticsImpl();
case DECIMAL:
return new DecimalStatisticsImpl();
Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Tue Sep 17 20:12:16 2013
@@ -33,13 +33,17 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
import org.apache.hadoop.io.Writable;
final class OrcStruct implements Writable {
@@ -473,6 +477,15 @@ final class OrcStruct implements Writabl
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ case VARCHAR:
+ // For varchar we need to retrieve the string length from the TypeInfo.
+ VarcharTypeParams varcharParams = (VarcharTypeParams)
+ ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(info);
+ if (varcharParams == null) {
+ throw new IllegalArgumentException("varchar type used without type params");
+ }
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ (PrimitiveTypeInfo) info);
case TIMESTAMP:
return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
case DATE:
@@ -519,6 +532,16 @@ final class OrcStruct implements Writabl
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ case VARCHAR:
+ if (!type.hasMaximumLength()) {
+ throw new UnsupportedOperationException(
+ "Illegal use of varchar type without length in ORC type definition.");
+ }
+ VarcharTypeParams varcharParams = new VarcharTypeParams();
+ varcharParams.setLength(type.getMaximumLength());
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs(
+ PrimitiveCategory.VARCHAR, varcharParams));
case TIMESTAMP:
return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
case DATE:
Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Sep 17 20:12:16 2013
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.io.sarg
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
@@ -1075,6 +1076,34 @@ class RecordReaderImpl implements Record
}
}
+ private static class VarcharTreeReader extends StringTreeReader {
+ int maxLength;
+
+ VarcharTreeReader(Path path, int columnId, int maxLength) {
+ super(path, columnId);
+ this.maxLength = maxLength;
+ }
+
+ @Override
+ Object next(Object previous) throws IOException {
+ HiveVarcharWritable result = null;
+ if (previous == null) {
+ result = new HiveVarcharWritable();
+ } else {
+ result = (HiveVarcharWritable) previous;
+ }
+ // Use the string reader implementation to populate the internal Text value
+ Object textVal = super.next(result.getTextValue());
+ if (textVal == null) {
+ return null;
+ }
+ // result should now hold the value that was read in.
+ // enforce varchar length
+ result.enforceMaxLength(maxLength);
+ return result;
+ }
+ }
+
private static class StructTreeReader extends TreeReader {
private final TreeReader[] fields;
private final String[] fieldNames;
@@ -1426,6 +1455,11 @@ class RecordReaderImpl implements Record
return new LongTreeReader(path, columnId);
case STRING:
return new StringTreeReader(path, columnId);
+ case VARCHAR:
+ if (!type.hasMaximumLength()) {
+ throw new IllegalArgumentException("ORC varchar type has no length specified");
+ }
+ return new VarcharTreeReader(path, columnId, type.getMaximumLength());
case BINARY:
return new BinaryTreeReader(path, columnId);
case TIMESTAMP:
Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Sep 17 20:12:16 2013
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
@@ -54,11 +55,14 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
@@ -876,12 +880,21 @@ class WriterImpl implements Writer, Memo
defaultFloatVal);
}
+ /**
+ * Method to retrieve string values from the value object, which can be overridden
+ * by subclasses.
+ * @param obj value
+ * @return String value from obj
+ */
+ String getStringValue(Object obj) {
+ return ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj);
+ }
+
@Override
void write(Object obj) throws IOException {
super.write(obj);
if (obj != null) {
- String val = ((StringObjectInspector) inspector)
- .getPrimitiveJavaObject(obj);
+ String val = getStringValue(obj);
rows.add(dictionary.add(val));
indexStatistics.updateString(val);
}
@@ -1014,6 +1027,28 @@ class WriterImpl implements Writer, Memo
}
}
+ /**
+ * Under the covers, varchar is written to ORC the same way as string.
+ */
+ private static class VarcharTreeWriter extends StringTreeWriter {
+
+ VarcharTreeWriter(int columnId,
+ ObjectInspector inspector,
+ StreamFactory writer,
+ boolean nullable) throws IOException {
+ super(columnId, inspector, writer, nullable);
+ }
+
+ /**
+ * Override base class implementation to support varchar values.
+ */
+ @Override
+ String getStringValue(Object obj) {
+ return (((HiveVarcharObjectInspector) inspector)
+ .getPrimitiveJavaObject(obj)).getValue();
+ }
+ }
+
private static class BinaryTreeWriter extends TreeWriter {
private final PositionedOutputStream stream;
private final IntegerWriter length;
@@ -1500,6 +1535,9 @@ class WriterImpl implements Writer, Memo
case STRING:
return new StringTreeWriter(streamFactory.getNextColumnId(),
inspector, streamFactory, nullable);
+ case VARCHAR:
+ return new VarcharTreeWriter(streamFactory.getNextColumnId(),
+ inspector, streamFactory, nullable);
case BINARY:
return new BinaryTreeWriter(streamFactory.getNextColumnId(),
inspector, streamFactory, nullable);
@@ -1565,6 +1603,18 @@ class WriterImpl implements Writer, Memo
case STRING:
type.setKind(OrcProto.Type.Kind.STRING);
break;
+ case VARCHAR:
+ // The varchar length needs to be written to file and should be available
+ // from the object inspector
+ VarcharTypeParams varcharParams = (VarcharTypeParams)
+ ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveObjectInspector(
+ (PrimitiveObjectInspector) treeWriter.inspector);
+ if (varcharParams == null) {
+ throw new IllegalArgumentException("No varchar length specified in ORC type");
+ }
+ type.setKind(Type.Kind.VARCHAR);
+ type.setMaximumLength(varcharParams.getLength());
+ break;
case BINARY:
type.setKind(OrcProto.Type.Kind.BINARY);
break;
Modified: hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/branches/branch-0.12/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Tue Sep 17 20:12:16 2013
@@ -103,10 +103,12 @@ message Type {
UNION = 13;
DECIMAL = 14;
DATE = 15;
+ VARCHAR = 16;
}
required Kind kind = 1;
repeated uint32 subtypes = 2 [packed=true];
repeated string fieldNames = 3;
+ optional uint32 maximumLength = 4;
}
message StripeInformation {
Added: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q?rev=1524203&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q (added)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/varchar_serde.q Tue Sep 17 20:12:16 2013
@@ -0,0 +1,102 @@
+drop table if exists varchar_serde_regex;
+drop table if exists varchar_serde_lb;
+drop table if exists varchar_serde_ls;
+drop table if exists varchar_serde_c;
+drop table if exists varchar_serde_lbc;
+drop table if exists varchar_serde_orc;
+
+--
+-- RegexSerDe
+--
+create table varchar_serde_regex (
+ key varchar(10),
+ value varchar(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+ "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile;
+
+load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex;
+
+select * from varchar_serde_regex limit 5;
+select value, count(*) from varchar_serde_regex group by value limit 5;
+
+--
+-- LazyBinary
+--
+create table varchar_serde_lb (
+ key varchar(10),
+ value varchar(20)
+);
+alter table varchar_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
+
+insert overwrite table varchar_serde_lb
+ select key, value from varchar_serde_regex;
+select * from varchar_serde_lb limit 5;
+select value, count(*) from varchar_serde_lb group by value limit 5;
+
+--
+-- LazySimple
+--
+create table varchar_serde_ls (
+ key varchar(10),
+ value varchar(20)
+);
+alter table varchar_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+
+insert overwrite table varchar_serde_ls
+ select key, value from varchar_serde_lb;
+select * from varchar_serde_ls limit 5;
+select value, count(*) from varchar_serde_ls group by value limit 5;
+
+--
+-- Columnar
+--
+create table varchar_serde_c (
+ key varchar(10),
+ value varchar(20)
+) stored as rcfile;
+alter table varchar_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+
+insert overwrite table varchar_serde_c
+ select key, value from varchar_serde_ls;
+select * from varchar_serde_c limit 5;
+select value, count(*) from varchar_serde_c group by value limit 5;
+
+--
+-- LazyBinaryColumnar
+--
+create table varchar_serde_lbc (
+ key varchar(10),
+ value varchar(20)
+) stored as rcfile;
+alter table varchar_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
+
+insert overwrite table varchar_serde_lbc
+ select key, value from varchar_serde_c;
+select * from varchar_serde_lbc limit 5;
+select value, count(*) from varchar_serde_lbc group by value limit 5;
+
+--
+-- ORC
+--
+create table varchar_serde_orc (
+ key varchar(10),
+ value varchar(20)
+) stored as orc;
+alter table varchar_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+
+
+insert overwrite table varchar_serde_orc
+ select key, value from varchar_serde_lbc;
+select * from varchar_serde_orc limit 5;
+select value, count(*) from varchar_serde_orc group by value limit 5;
+
+drop table if exists varchar_serde_regex;
+drop table if exists varchar_serde_lb;
+drop table if exists varchar_serde_ls;
+drop table if exists varchar_serde_c;
+drop table if exists varchar_serde_lbc;
+drop table if exists varchar_serde_orc;
Added: hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out?rev=1524203&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out (added)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/varchar_serde.q.out Tue Sep 17 20:12:16 2013
@@ -0,0 +1,626 @@
+PREHOOK: query: drop table if exists varchar_serde_regex
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_regex
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_lb
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_lb
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_ls
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_ls
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_c
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_c
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_lbc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_lbc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchar_serde_orc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchar_serde_orc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: --
+-- RegexSerDe
+--
+create table varchar_serde_regex (
+ key varchar(10),
+ value varchar(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+ "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- RegexSerDe
+--
+create table varchar_serde_regex (
+ key varchar(10),
+ value varchar(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+ "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_regex
+PREHOOK: query: load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex
+PREHOOK: type: LOAD
+PREHOOK: Output: default@varchar_serde_regex
+POSTHOOK: query: load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@varchar_serde_regex
+PREHOOK: query: select * from varchar_serde_regex limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_regex limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from varchar_serde_regex group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_regex group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_regex
+#### A masked pattern was here ####
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- LazyBinary
+--
+create table varchar_serde_lb (
+ key varchar(10),
+ value varchar(20)
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazyBinary
+--
+create table varchar_serde_lb (
+ key varchar(10),
+ value varchar(20)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_lb
+PREHOOK: query: alter table varchar_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_lb
+PREHOOK: Output: default@varchar_serde_lb
+POSTHOOK: query: alter table varchar_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_lb
+POSTHOOK: Output: default@varchar_serde_lb
+PREHOOK: query: insert overwrite table varchar_serde_lb
+ select key, value from varchar_serde_regex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_regex
+PREHOOK: Output: default@varchar_serde_lb
+POSTHOOK: query: insert overwrite table varchar_serde_lb
+ select key, value from varchar_serde_regex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_regex
+POSTHOOK: Output: default@varchar_serde_lb
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_lb limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_lb limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from varchar_serde_lb group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_lb group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- LazySimple
+--
+create table varchar_serde_ls (
+ key varchar(10),
+ value varchar(20)
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazySimple
+--
+create table varchar_serde_ls (
+ key varchar(10),
+ value varchar(20)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_ls
+PREHOOK: Output: default@varchar_serde_ls
+POSTHOOK: query: alter table varchar_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_ls
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_ls
+ select key, value from varchar_serde_lb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lb
+PREHOOK: Output: default@varchar_serde_ls
+POSTHOOK: query: insert overwrite table varchar_serde_ls
+ select key, value from varchar_serde_lb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lb
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_ls limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_ls limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from varchar_serde_ls group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_ls group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- Columnar
+--
+create table varchar_serde_c (
+ key varchar(10),
+ value varchar(20)
+) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- Columnar
+--
+create table varchar_serde_c (
+ key varchar(10),
+ value varchar(20)
+) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_c
+PREHOOK: Output: default@varchar_serde_c
+POSTHOOK: query: alter table varchar_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_c
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_c
+ select key, value from varchar_serde_ls
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_ls
+PREHOOK: Output: default@varchar_serde_c
+POSTHOOK: query: insert overwrite table varchar_serde_c
+ select key, value from varchar_serde_ls
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_ls
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_c limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_c limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from varchar_serde_c group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_c group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_c
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- LazyBinaryColumnar
+--
+create table varchar_serde_lbc (
+ key varchar(10),
+ value varchar(20)
+) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazyBinaryColumnar
+--
+create table varchar_serde_lbc (
+ key varchar(10),
+ value varchar(20)
+) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_lbc
+PREHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: query: alter table varchar_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_lbc
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_lbc
+ select key, value from varchar_serde_c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_c
+PREHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: query: insert overwrite table varchar_serde_lbc
+ select key, value from varchar_serde_c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_c
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_lbc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_lbc limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from varchar_serde_lbc group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_lbc group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- ORC
+--
+create table varchar_serde_orc (
+ key varchar(10),
+ value varchar(20)
+) stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- ORC
+--
+create table varchar_serde_orc (
+ key varchar(10),
+ value varchar(20)
+) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: alter table varchar_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@varchar_serde_orc
+PREHOOK: Output: default@varchar_serde_orc
+POSTHOOK: query: alter table varchar_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@varchar_serde_orc
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table varchar_serde_orc
+ select key, value from varchar_serde_lbc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_lbc
+PREHOOK: Output: default@varchar_serde_orc
+POSTHOOK: query: insert overwrite table varchar_serde_orc
+ select key, value from varchar_serde_lbc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_lbc
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: select * from varchar_serde_orc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from varchar_serde_orc limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from varchar_serde_orc group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from varchar_serde_orc group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: drop table if exists varchar_serde_regex
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_regex
+PREHOOK: Output: default@varchar_serde_regex
+POSTHOOK: query: drop table if exists varchar_serde_regex
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_regex
+POSTHOOK: Output: default@varchar_serde_regex
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_lb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_lb
+PREHOOK: Output: default@varchar_serde_lb
+POSTHOOK: query: drop table if exists varchar_serde_lb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_lb
+POSTHOOK: Output: default@varchar_serde_lb
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_ls
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_ls
+PREHOOK: Output: default@varchar_serde_ls
+POSTHOOK: query: drop table if exists varchar_serde_ls
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_ls
+POSTHOOK: Output: default@varchar_serde_ls
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_c
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_c
+PREHOOK: Output: default@varchar_serde_c
+POSTHOOK: query: drop table if exists varchar_serde_c
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_c
+POSTHOOK: Output: default@varchar_serde_c
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_lbc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_lbc
+PREHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: query: drop table if exists varchar_serde_lbc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_lbc
+POSTHOOK: Output: default@varchar_serde_lbc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists varchar_serde_orc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_serde_orc
+PREHOOK: Output: default@varchar_serde_orc
+POSTHOOK: query: drop table if exists varchar_serde_orc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_serde_orc
+POSTHOOK: Output: default@varchar_serde_orc
+POSTHOOK: Lineage: varchar_serde_c.key SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_c.value SIMPLE [(varchar_serde_ls)varchar_serde_ls.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.key SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lb.value SIMPLE [(varchar_serde_regex)varchar_serde_regex.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.key SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_lbc.value SIMPLE [(varchar_serde_c)varchar_serde_c.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.key SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_ls.value SIMPLE [(varchar_serde_lb)varchar_serde_lb.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.key SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:key, type:varchar(10), comment:from deserializer), ]
+POSTHOOK: Lineage: varchar_serde_orc.value SIMPLE [(varchar_serde_lbc)varchar_serde_lbc.FieldSchema(name:value, type:varchar(20), comment:from deserializer), ]
Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java Tue Sep 17 20:12:16 2013
@@ -30,13 +30,19 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -119,7 +125,8 @@ public class RegexSerDe extends Abstract
*/
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
for (int c = 0; c < numColumns; c++) {
- String typeName = columnTypes.get(c).getTypeName();
+ TypeInfo typeInfo = columnTypes.get(c);
+ String typeName = typeInfo.getTypeName();
if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
} else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) {
@@ -142,6 +149,13 @@ public class RegexSerDe extends Abstract
columnOIs.add(PrimitiveObjectInspectorFactory.javaDateObjectInspector);
} else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
columnOIs.add(PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector);
+ } else if (typeInfo instanceof PrimitiveTypeInfo
+ &&
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) {
+ VarcharTypeParams varcharParams = (VarcharTypeParams)
+ ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo);
+ columnOIs.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+ (PrimitiveTypeInfo) typeInfo));
} else {
throw new SerDeException(getClass().getName()
+ " doesn't allow column [" + c + "] named "
@@ -202,7 +216,8 @@ public class RegexSerDe extends Abstract
for (int c = 0; c < numColumns; c++) {
try {
String t = m.group(c+1);
- String typeName = columnTypes.get(c).getTypeName();
+ TypeInfo typeInfo = columnTypes.get(c);
+ String typeName = typeInfo.getTypeName();
// Convert the column to the correct type when needed and set in row obj
if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
@@ -247,6 +262,13 @@ public class RegexSerDe extends Abstract
HiveDecimal bd;
bd = new HiveDecimal(t);
row.set(c, bd);
+ } else if (typeInfo instanceof PrimitiveTypeInfo
+ &&
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) {
+ VarcharTypeParams varcharParams = (VarcharTypeParams)
+ ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo);
+ HiveVarchar hv = new HiveVarchar(t, varcharParams != null ? varcharParams.length : -1);
+ row.set(c, hv);
}
} catch (RuntimeException e) {
partialMatchedRowsCount++;
Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java Tue Sep 17 20:12:16 2013
@@ -703,12 +703,9 @@ public class BinarySortableSerDe extends
case VARCHAR: {
HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector)poi;
HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
- try {
- ByteBuffer bb = Text.encode(hc.getHiveVarchar().getValue());
- serializeBytes(buffer, bb.array(), bb.limit(), invert);
- } catch (CharacterCodingException err) {
- throw new SerDeException(err);
- }
+ // use varchar's text field directly
+ Text t = hc.getTextValue();
+ serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
return;
}
Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Tue Sep 17 20:12:16 2013
@@ -229,8 +229,9 @@ public final class LazyUtils {
case VARCHAR: {
HiveVarcharWritable hc = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(o);
- ByteBuffer b = Text.encode(hc.toString());
- writeEscaped(out, b.array(), 0, b.limit(), escaped, escapeChar, needsEscape);
+ Text t = hc.getTextValue();
+ writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar,
+ needsEscape);
break;
}
case BINARY: {
Modified: hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1524203&r1=1524202&r2=1524203&view=diff
==============================================================================
--- hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original)
+++ hive/branches/branch-0.12/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Tue Sep 17 20:12:16 2013
@@ -270,6 +270,17 @@ public class LazyBinarySerDe extends Abs
return warnedOnceNullMapKey;
}
+ private static void serializeText(Output byteStream, Text t, boolean skipLengthPrefix) {
+ /* write byte size of the string which is a vint */
+ int length = t.getLength();
+ if (!skipLengthPrefix) {
+ LazyBinaryUtils.writeVInt(byteStream, length);
+ }
+ /* write string itself */
+ byte[] data = t.getBytes();
+ byteStream.write(data, 0, length);
+ }
+
/**
* A recursive function that serialize an object to a byte buffer based on its
* object inspector.
@@ -358,32 +369,13 @@ public class LazyBinarySerDe extends Abs
case STRING: {
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(obj);
- /* write byte size of the string which is a vint */
- int length = t.getLength();
- if (!skipLengthPrefix) {
- LazyBinaryUtils.writeVInt(byteStream, length);
- }
- /* write string itself */
- byte[] data = t.getBytes();
- byteStream.write(data, 0, length);
+ serializeText(byteStream, t, skipLengthPrefix);
return warnedOnceNullMapKey;
}
case VARCHAR: {
HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
- String value =
- hcoi.getPrimitiveWritableObject(obj).getHiveVarchar().getValue();
- int length = value.length();
- // Write byte size
- if (!skipLengthPrefix) {
- LazyBinaryUtils.writeVInt(byteStream, length);
- }
- // Write string value
- try {
- ByteBuffer bb = Text.encode(value);
- byteStream.write(bb.array(), 0, bb.limit());
- } catch (CharacterCodingException err) {
- throw new SerDeException(err);
- }
+ Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
+ serializeText(byteStream, t, skipLengthPrefix);
return warnedOnceNullMapKey;
}
case BINARY: {