You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2013/11/29 00:31:48 UTC
svn commit: r1546475 - in /hive/trunk:
ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/
ql/src/test/queries/clientpositive/ ql/src/test/result...
Author: xuefu
Date: Thu Nov 28 23:31:48 2013
New Revision: 1546475
URL: http://svn.apache.org/r1546475
Log:
HIVE-5684: Serde support for char (Jason via Xuefu)
Added:
hive/trunk/ql/src/test/queries/clientpositive/char_serde.q
hive/trunk/ql/src/test/results/clientpositive/char_serde.q.out
Modified:
hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Thu Nov 28 23:31:48 2013
@@ -9913,6 +9913,10 @@ public final class OrcProto {
* <code>VARCHAR = 16;</code>
*/
VARCHAR(16, 16),
+ /**
+ * <code>CHAR = 17;</code>
+ */
+ CHAR(17, 17),
;
/**
@@ -9983,6 +9987,10 @@ public final class OrcProto {
* <code>VARCHAR = 16;</code>
*/
public static final int VARCHAR_VALUE = 16;
+ /**
+ * <code>CHAR = 17;</code>
+ */
+ public static final int CHAR_VALUE = 17;
public final int getNumber() { return value; }
@@ -10006,6 +10014,7 @@ public final class OrcProto {
case 14: return DECIMAL;
case 15: return DATE;
case 16: return VARCHAR;
+ case 17: return CHAR;
default: return null;
}
}
@@ -16767,40 +16776,40 @@ public final class OrcProto {
"9\n\007streams\030\001 \003(\0132(.org.apache.hadoop.hiv" +
"e.ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.or" +
"g.apache.hadoop.hive.ql.io.orc.ColumnEnc" +
- "oding\"\356\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
+ "oding\"\370\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
"e.hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010sub" +
"types\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rm" +
"aximumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n" +
- "\005scale\030\006 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BY" +
+ "\005scale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BY" +
"TE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FL",
"OAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020" +
"\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006S" +
"TRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020" +
- "\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n\006o" +
- "ffset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndata" +
- "Length\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014nu" +
- "mberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004" +
- "name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020StripeStati" +
- "stics\022D\n\010colStats\030\001 \003(\01322.org.apache.had" +
- "oop.hive.ql.io.orc.ColumnStatistics\"S\n\010M",
- "etadata\022G\n\013stripeStats\030\001 \003(\01322.org.apach" +
- "e.hadoop.hive.ql.io.orc.StripeStatistics" +
- "\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcon" +
- "tentLength\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org." +
- "apache.hadoop.hive.ql.io.orc.StripeInfor" +
- "mation\0225\n\005types\030\004 \003(\0132&.org.apache.hadoo" +
- "p.hive.ql.io.orc.Type\022D\n\010metadata\030\005 \003(\0132" +
- "2.org.apache.hadoop.hive.ql.io.orc.UserM" +
- "etadataItem\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nsta" +
- "tistics\030\007 \003(\01322.org.apache.hadoop.hive.q",
- "l.io.orc.ColumnStatistics\022\026\n\016rowIndexStr" +
- "ide\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014footerLength" +
- "\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621.org.apache" +
- ".hadoop.hive.ql.io.orc.CompressionKind\022\034" +
- "\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007version\030" +
- "\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\016\n\005mag" +
- "ic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" +
- "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+ "\017\022\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInform" +
+ "ation\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001" +
+ "(\004\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004" +
+ " \001(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetada" +
+ "taItem\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020S" +
+ "tripeStatistics\022D\n\010colStats\030\001 \003(\01322.org." +
+ "apache.hadoop.hive.ql.io.orc.ColumnStati",
+ "stics\"S\n\010Metadata\022G\n\013stripeStats\030\001 \003(\01322" +
+ ".org.apache.hadoop.hive.ql.io.orc.Stripe" +
+ "Statistics\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 " +
+ "\001(\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007stripes\030\003 " +
+ "\003(\01323.org.apache.hadoop.hive.ql.io.orc.S" +
+ "tripeInformation\0225\n\005types\030\004 \003(\0132&.org.ap" +
+ "ache.hadoop.hive.ql.io.orc.Type\022D\n\010metad" +
+ "ata\030\005 \003(\01322.org.apache.hadoop.hive.ql.io" +
+ ".orc.UserMetadataItem\022\024\n\014numberOfRows\030\006 " +
+ "\001(\004\022F\n\nstatistics\030\007 \003(\01322.org.apache.had",
+ "oop.hive.ql.io.orc.ColumnStatistics\022\026\n\016r" +
+ "owIndexStride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014fo" +
+ "oterLength\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621." +
+ "org.apache.hadoop.hive.ql.io.orc.Compres" +
+ "sionKind\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023" +
+ "\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 " +
+ "\001(\004\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010" +
+ "\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Thu Nov 28 23:31:48 2013
@@ -789,6 +789,7 @@ class ColumnStatisticsImpl implements Co
case DOUBLE:
return new DoubleStatisticsImpl();
case STRING:
+ case CHAR:
case VARCHAR:
return new StringStatisticsImpl();
case DECIMAL:
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Thu Nov 28 23:31:48 2013
@@ -483,6 +483,9 @@ final class OrcStruct implements Writabl
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ case CHAR:
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ (PrimitiveTypeInfo) info);
case VARCHAR:
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
(PrimitiveTypeInfo) info);
@@ -533,6 +536,13 @@ final class OrcStruct implements Writabl
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ case CHAR:
+ if (!type.hasMaximumLength()) {
+ throw new UnsupportedOperationException(
+ "Illegal use of char type without length in ORC type definition.");
+ }
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ TypeInfoFactory.getCharTypeInfo(type.getMaximumLength()));
case VARCHAR:
if (!type.hasMaximumLength()) {
throw new UnsupportedOperationException(
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Thu Nov 28 23:31:48 2013
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.io.sarg
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
@@ -1471,6 +1472,34 @@ class RecordReaderImpl implements Record
}
}
+ private static class CharTreeReader extends StringTreeReader {
+ int maxLength;
+
+ CharTreeReader(Path path, int columnId, int maxLength) {
+ super(path, columnId);
+ this.maxLength = maxLength;
+ }
+
+ @Override
+ Object next(Object previous) throws IOException {
+ HiveCharWritable result = null;
+ if (previous == null) {
+ result = new HiveCharWritable();
+ } else {
+ result = (HiveCharWritable) previous;
+ }
+ // Use the string reader implementation to populate the internal Text value
+ Object textVal = super.next(result.getTextValue());
+ if (textVal == null) {
+ return null;
+ }
+ // result should now hold the value that was read in.
+ // enforce char length
+ result.enforceMaxLength(maxLength);
+ return result;
+ }
+ }
+
private static class VarcharTreeReader extends StringTreeReader {
int maxLength;
@@ -1890,6 +1919,11 @@ class RecordReaderImpl implements Record
return new LongTreeReader(path, columnId);
case STRING:
return new StringTreeReader(path, columnId);
+ case CHAR:
+ if (!type.hasMaximumLength()) {
+ throw new IllegalArgumentException("ORC char type has no length specified");
+ }
+ return new CharTreeReader(path, columnId, type.getMaximumLength());
case VARCHAR:
if (!type.hasMaximumLength()) {
throw new IllegalArgumentException("ORC varchar type has no length specified");
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Thu Nov 28 23:31:48 2013
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
@@ -60,6 +61,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BytesWritable;
@@ -1053,6 +1055,28 @@ class WriterImpl implements Writer, Memo
}
/**
+ * Under the covers, char is written to ORC the same way as string.
+ */
+ private static class CharTreeWriter extends StringTreeWriter {
+
+ CharTreeWriter(int columnId,
+ ObjectInspector inspector,
+ StreamFactory writer,
+ boolean nullable) throws IOException {
+ super(columnId, inspector, writer, nullable);
+ }
+
+ /**
+ * Override base class implementation to support char values.
+ */
+ @Override
+ String getStringValue(Object obj) {
+ return (((HiveCharObjectInspector) inspector)
+ .getPrimitiveJavaObject(obj)).getValue();
+ }
+ }
+
+ /**
* Under the covers, varchar is written to ORC the same way as string.
*/
private static class VarcharTreeWriter extends StringTreeWriter {
@@ -1564,6 +1588,9 @@ class WriterImpl implements Writer, Memo
case STRING:
return new StringTreeWriter(streamFactory.getNextColumnId(),
inspector, streamFactory, nullable);
+ case CHAR:
+ return new CharTreeWriter(streamFactory.getNextColumnId(),
+ inspector, streamFactory, nullable);
case VARCHAR:
return new VarcharTreeWriter(streamFactory.getNextColumnId(),
inspector, streamFactory, nullable);
@@ -1632,6 +1659,13 @@ class WriterImpl implements Writer, Memo
case STRING:
type.setKind(OrcProto.Type.Kind.STRING);
break;
+ case CHAR:
+ // The char length needs to be written to file and should be available
+ // from the object inspector
+ CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
+ type.setKind(Type.Kind.CHAR);
+ type.setMaximumLength(charTypeInfo.getLength());
+ break;
case VARCHAR:
// The varchar length needs to be written to file and should be available
// from the object inspector
Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Thu Nov 28 23:31:48 2013
@@ -130,6 +130,7 @@ message Type {
DECIMAL = 14;
DATE = 15;
VARCHAR = 16;
+ CHAR = 17;
}
required Kind kind = 1;
repeated uint32 subtypes = 2 [packed=true];
Added: hive/trunk/ql/src/test/queries/clientpositive/char_serde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/char_serde.q?rev=1546475&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/char_serde.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/char_serde.q Thu Nov 28 23:31:48 2013
@@ -0,0 +1,102 @@
+drop table if exists char_serde_regex;
+drop table if exists char_serde_lb;
+drop table if exists char_serde_ls;
+drop table if exists char_serde_c;
+drop table if exists char_serde_lbc;
+drop table if exists char_serde_orc;
+
+--
+-- RegexSerDe
+--
+create table char_serde_regex (
+ key char(15),
+ value char(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+ "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile;
+
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex;
+
+select * from char_serde_regex limit 5;
+select value, count(*) from char_serde_regex group by value limit 5;
+
+--
+-- LazyBinary
+--
+create table char_serde_lb (
+ key char(15),
+ value char(20)
+);
+alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
+
+insert overwrite table char_serde_lb
+ select key, value from char_serde_regex;
+select * from char_serde_lb limit 5;
+select value, count(*) from char_serde_lb group by value limit 5;
+
+--
+-- LazySimple
+--
+create table char_serde_ls (
+ key char(15),
+ value char(20)
+);
+alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+
+insert overwrite table char_serde_ls
+ select key, value from char_serde_lb;
+select * from char_serde_ls limit 5;
+select value, count(*) from char_serde_ls group by value limit 5;
+
+--
+-- Columnar
+--
+create table char_serde_c (
+ key char(15),
+ value char(20)
+) stored as rcfile;
+alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+
+insert overwrite table char_serde_c
+ select key, value from char_serde_ls;
+select * from char_serde_c limit 5;
+select value, count(*) from char_serde_c group by value limit 5;
+
+--
+-- LazyBinaryColumnar
+--
+create table char_serde_lbc (
+ key char(15),
+ value char(20)
+) stored as rcfile;
+alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
+
+insert overwrite table char_serde_lbc
+ select key, value from char_serde_c;
+select * from char_serde_lbc limit 5;
+select value, count(*) from char_serde_lbc group by value limit 5;
+
+--
+-- ORC
+--
+create table char_serde_orc (
+ key char(15),
+ value char(20)
+) stored as orc;
+alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+
+
+insert overwrite table char_serde_orc
+ select key, value from char_serde_lbc;
+select * from char_serde_orc limit 5;
+select value, count(*) from char_serde_orc group by value limit 5;
+
+drop table if exists char_serde_regex;
+drop table if exists char_serde_lb;
+drop table if exists char_serde_ls;
+drop table if exists char_serde_c;
+drop table if exists char_serde_lbc;
+drop table if exists char_serde_orc;
Added: hive/trunk/ql/src/test/results/clientpositive/char_serde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/char_serde.q.out?rev=1546475&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/char_serde.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/char_serde.q.out Thu Nov 28 23:31:48 2013
@@ -0,0 +1,626 @@
+PREHOOK: query: drop table if exists char_serde_regex
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_serde_regex
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_serde_lb
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_serde_lb
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_serde_ls
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_serde_ls
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_serde_c
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_serde_c
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_serde_lbc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_serde_lbc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists char_serde_orc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists char_serde_orc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: --
+-- RegexSerDe
+--
+create table char_serde_regex (
+ key char(15),
+ value char(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+ "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- RegexSerDe
+--
+create table char_serde_regex (
+ key char(15),
+ value char(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+ "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@char_serde_regex
+PREHOOK: query: load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex
+PREHOOK: type: LOAD
+PREHOOK: Output: default@char_serde_regex
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@char_serde_regex
+PREHOOK: query: select * from char_serde_regex limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_regex
+#### A masked pattern was here ####
+POSTHOOK: query: select * from char_serde_regex limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_regex
+#### A masked pattern was here ####
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from char_serde_regex group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_regex
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from char_serde_regex group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_regex
+#### A masked pattern was here ####
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- LazyBinary
+--
+create table char_serde_lb (
+ key char(15),
+ value char(20)
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazyBinary
+--
+create table char_serde_lb (
+ key char(15),
+ value char(20)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@char_serde_lb
+PREHOOK: query: alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@char_serde_lb
+PREHOOK: Output: default@char_serde_lb
+POSTHOOK: query: alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@char_serde_lb
+POSTHOOK: Output: default@char_serde_lb
+PREHOOK: query: insert overwrite table char_serde_lb
+ select key, value from char_serde_regex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_regex
+PREHOOK: Output: default@char_serde_lb
+POSTHOOK: query: insert overwrite table char_serde_lb
+ select key, value from char_serde_regex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_regex
+POSTHOOK: Output: default@char_serde_lb
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: select * from char_serde_lb limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: query: select * from char_serde_lb limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from char_serde_lb group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from char_serde_lb group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_lb
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- LazySimple
+--
+create table char_serde_ls (
+ key char(15),
+ value char(20)
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazySimple
+--
+create table char_serde_ls (
+ key char(15),
+ value char(20)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@char_serde_ls
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@char_serde_ls
+PREHOOK: Output: default@char_serde_ls
+POSTHOOK: query: alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@char_serde_ls
+POSTHOOK: Output: default@char_serde_ls
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table char_serde_ls
+ select key, value from char_serde_lb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_lb
+PREHOOK: Output: default@char_serde_ls
+POSTHOOK: query: insert overwrite table char_serde_ls
+ select key, value from char_serde_lb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_lb
+POSTHOOK: Output: default@char_serde_ls
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: select * from char_serde_ls limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: query: select * from char_serde_ls limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from char_serde_ls group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from char_serde_ls group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_ls
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- Columnar
+--
+create table char_serde_c (
+ key char(15),
+ value char(20)
+) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- Columnar
+--
+create table char_serde_c (
+ key char(15),
+ value char(20)
+) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@char_serde_c
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@char_serde_c
+PREHOOK: Output: default@char_serde_c
+POSTHOOK: query: alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@char_serde_c
+POSTHOOK: Output: default@char_serde_c
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table char_serde_c
+ select key, value from char_serde_ls
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_ls
+PREHOOK: Output: default@char_serde_c
+POSTHOOK: query: insert overwrite table char_serde_c
+ select key, value from char_serde_ls
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_ls
+POSTHOOK: Output: default@char_serde_c
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: select * from char_serde_c limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_c
+#### A masked pattern was here ####
+POSTHOOK: query: select * from char_serde_c limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_c
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from char_serde_c group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_c
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from char_serde_c group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_c
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- LazyBinaryColumnar
+--
+create table char_serde_lbc (
+ key char(15),
+ value char(20)
+) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- LazyBinaryColumnar
+--
+create table char_serde_lbc (
+ key char(15),
+ value char(20)
+) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@char_serde_lbc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@char_serde_lbc
+PREHOOK: Output: default@char_serde_lbc
+POSTHOOK: query: alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@char_serde_lbc
+POSTHOOK: Output: default@char_serde_lbc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table char_serde_lbc
+ select key, value from char_serde_c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_c
+PREHOOK: Output: default@char_serde_lbc
+POSTHOOK: query: insert overwrite table char_serde_lbc
+ select key, value from char_serde_c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_c
+POSTHOOK: Output: default@char_serde_lbc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: select * from char_serde_lbc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from char_serde_lbc limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from char_serde_lbc group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from char_serde_lbc group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_lbc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: --
+-- ORC
+--
+create table char_serde_orc (
+ key char(15),
+ value char(20)
+) stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --
+-- ORC
+--
+create table char_serde_orc (
+ key char(15),
+ value char(20)
+) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@char_serde_orc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@char_serde_orc
+PREHOOK: Output: default@char_serde_orc
+POSTHOOK: query: alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@char_serde_orc
+POSTHOOK: Output: default@char_serde_orc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: insert overwrite table char_serde_orc
+ select key, value from char_serde_lbc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_lbc
+PREHOOK: Output: default@char_serde_orc
+POSTHOOK: query: insert overwrite table char_serde_orc
+ select key, value from char_serde_lbc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_lbc
+POSTHOOK: Output: default@char_serde_orc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: select * from char_serde_orc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from char_serde_orc limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+474 val_475
+62 val_63
+468 val_469
+272 val_273
+448 val_449
+PREHOOK: query: select value, count(*) from char_serde_orc group by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select value, count(*) from char_serde_orc group by value limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_serde_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+val_0 3
+val_1 2
+val_10 1
+val_100 2
+val_101 2
+PREHOOK: query: drop table if exists char_serde_regex
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@char_serde_regex
+PREHOOK: Output: default@char_serde_regex
+POSTHOOK: query: drop table if exists char_serde_regex
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@char_serde_regex
+POSTHOOK: Output: default@char_serde_regex
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists char_serde_lb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@char_serde_lb
+PREHOOK: Output: default@char_serde_lb
+POSTHOOK: query: drop table if exists char_serde_lb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@char_serde_lb
+POSTHOOK: Output: default@char_serde_lb
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists char_serde_ls
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@char_serde_ls
+PREHOOK: Output: default@char_serde_ls
+POSTHOOK: query: drop table if exists char_serde_ls
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@char_serde_ls
+POSTHOOK: Output: default@char_serde_ls
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists char_serde_c
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@char_serde_c
+PREHOOK: Output: default@char_serde_c
+POSTHOOK: query: drop table if exists char_serde_c
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@char_serde_c
+POSTHOOK: Output: default@char_serde_c
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists char_serde_lbc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@char_serde_lbc
+PREHOOK: Output: default@char_serde_lbc
+POSTHOOK: query: drop table if exists char_serde_lbc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@char_serde_lbc
+POSTHOOK: Output: default@char_serde_lbc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+PREHOOK: query: drop table if exists char_serde_orc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@char_serde_orc
+PREHOOK: Output: default@char_serde_orc
+POSTHOOK: query: drop table if exists char_serde_orc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@char_serde_orc
+POSTHOOK: Output: default@char_serde_orc
+POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ]
+POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ]
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java?rev=1546475&r1=1546474&r2=1546475&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java Thu Nov 28 23:31:48 2013
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde.serdeConstants;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -191,53 +193,72 @@ public class RegexSerDe extends Abstract
try {
String t = m.group(c+1);
TypeInfo typeInfo = columnTypes.get(c);
- String typeName = typeInfo.getTypeName();
// Convert the column to the correct type when needed and set in row obj
- if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
+ PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
+ switch (pti.getPrimitiveCategory()) {
+ case STRING:
row.set(c, t);
- } else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) {
+ break;
+ case BYTE:
Byte b;
b = Byte.valueOf(t);
row.set(c,b);
- } else if (typeName.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
+ break;
+ case SHORT:
Short s;
s = Short.valueOf(t);
row.set(c,s);
- } else if (typeName.equals(serdeConstants.INT_TYPE_NAME)) {
+ break;
+ case INT:
Integer i;
i = Integer.valueOf(t);
row.set(c, i);
- } else if (typeName.equals(serdeConstants.BIGINT_TYPE_NAME)) {
+ break;
+ case LONG:
Long l;
l = Long.valueOf(t);
row.set(c, l);
- } else if (typeName.equals(serdeConstants.FLOAT_TYPE_NAME)) {
+ break;
+ case FLOAT:
Float f;
f = Float.valueOf(t);
row.set(c,f);
- } else if (typeName.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
+ break;
+ case DOUBLE:
Double d;
d = Double.valueOf(t);
row.set(c,d);
- } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
- Boolean b;
- b = Boolean.valueOf(t);
- row.set(c, b);
- } else if (typeName.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ break;
+ case BOOLEAN:
+ Boolean bool;
+ bool = Boolean.valueOf(t);
+ row.set(c, bool);
+ break;
+ case TIMESTAMP:
Timestamp ts;
ts = Timestamp.valueOf(t);
row.set(c, ts);
- } else if (typeName.equals(serdeConstants.DATE_TYPE_NAME)) {
- Date d;
- d = Date.valueOf(t);
- row.set(c, d);
- } else if (typeInfo instanceof DecimalTypeInfo) {
+ break;
+ case DATE:
+ Date date;
+ date = Date.valueOf(t);
+ row.set(c, date);
+ break;
+ case DECIMAL:
HiveDecimal bd = HiveDecimal.create(t);
row.set(c, bd);
- } else if (typeInfo instanceof VarcharTypeInfo) {
+ break;
+ case CHAR:
+ HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
+ row.set(c, hc);
+ break;
+ case VARCHAR:
HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo)typeInfo).getLength());
row.set(c, hv);
+ break;
+ default:
+ throw new SerDeException("Unsupported type " + typeInfo);
}
} catch (RuntimeException e) {
partialMatchedRowsCount++;