You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/03/08 01:56:47 UTC
svn commit: r1575466 - in /hive/trunk: data/files/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/
ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: xuefu
Date: Sat Mar 8 00:56:47 2014
New Revision: 1575466
URL: http://svn.apache.org/r1575466
Log:
HIVE-6414: ParquetInputFormat provides data values that do not match the object inspectors (Justin Coffey via Xuefu)
Added:
hive/trunk/data/files/parquet_types.txt
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java
Added: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1575466&view=auto
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (added)
+++ hive/trunk/data/files/parquet_types.txt Sat Mar 8 00:56:47 2014
@@ -0,0 +1,21 @@
+100|1|1|1.0|0.0|abc
+101|2|2|1.1|0.3|def
+102|3|3|1.2|0.6|ghi
+103|1|4|1.3|0.9|jkl
+104|2|5|1.4|1.2|mno
+105|3|1|1.0|1.5|pqr
+106|1|2|1.1|1.8|stu
+107|2|3|1.2|2.1|vwx
+108|3|4|1.3|2.4|yza
+109|1|5|1.4|2.7|bcd
+110|2|1|1.0|3.0|efg
+111|3|2|1.1|3.3|hij
+112|1|3|1.2|3.6|klm
+113|2|4|1.3|3.9|nop
+114|3|5|1.4|4.2|qrs
+115|1|1|1.0|4.5|tuv
+116|2|2|1.1|4.8|wxy
+117|3|3|1.2|5.1|zab
+118|1|4|1.3|5.4|cde
+119|2|5|1.4|5.7|fgh
+120|3|1|1.0|6.0|ijk
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java?rev=1575466&r1=1575465&r2=1575466&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java Sat Mar 8 00:56:47 2014
@@ -35,6 +35,11 @@ public class ParquetByteInspector extend
}
@Override
+ public Object getPrimitiveJavaObject(final Object o) {
+ return o == null ? null : get(o);
+ }
+
+ @Override
public Object create(final byte val) {
return new ByteWritable(val);
}
@@ -51,6 +56,7 @@ public class ParquetByteInspector extend
if (o instanceof IntWritable) {
return (byte) ((IntWritable) o).get();
}
+
return ((ByteWritable) o).get();
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java?rev=1575466&r1=1575465&r2=1575466&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java Sat Mar 8 00:56:47 2014
@@ -35,6 +35,11 @@ public class ParquetShortInspector exten
}
@Override
+ public Object getPrimitiveJavaObject(final Object o) {
+ return o == null ? null : get(o);
+ }
+
+ @Override
public Object create(final short val) {
return new ShortWritable(val);
}
@@ -51,6 +56,7 @@ public class ParquetShortInspector exten
if (o instanceof IntWritable) {
return (short) ((IntWritable) o).get();
}
+
return ((ShortWritable) o).get();
}
}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java Sat Mar 8 00:56:47 2014
@@ -0,0 +1,55 @@
+package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestParquetByteInspector {
+
+ private ParquetByteInspector inspector;
+
+ @Before
+ public void setUp() {
+ inspector = new ParquetByteInspector();
+ }
+
+ @Test
+ public void testByteWritable() {
+ ByteWritable obj = new ByteWritable((byte) 5);
+ assertEquals(obj, inspector.getPrimitiveWritableObject(obj));
+ assertEquals((byte) 5, inspector.getPrimitiveJavaObject(obj));
+ }
+
+ @Test
+ public void testIntWritable() {
+ IntWritable obj = new IntWritable(10);
+ assertEquals(new ByteWritable((byte) 10), inspector.getPrimitiveWritableObject(obj));
+ assertEquals((byte) 10, inspector.getPrimitiveJavaObject(obj));
+ }
+
+ @Test
+ public void testNull() {
+ assertNull(inspector.getPrimitiveWritableObject(null));
+ assertNull(inspector.getPrimitiveJavaObject(null));
+ }
+
+ @Test
+ public void testCreate() {
+ assertEquals(new ByteWritable((byte) 8), inspector.create((byte) 8));
+ }
+
+ @Test
+ public void testSet() {
+ ByteWritable obj = new ByteWritable();
+ assertEquals(new ByteWritable((byte) 12), inspector.set(obj, (byte) 12));
+ }
+
+ @Test
+ public void testGet() {
+ ByteWritable obj = new ByteWritable((byte) 15);
+ assertEquals((byte) 15, inspector.get(obj));
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java Sat Mar 8 00:56:47 2014
@@ -0,0 +1,57 @@
+package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestParquetShortInspector {
+
+ private ParquetShortInspector inspector;
+
+ @Before
+ public void setUp() {
+ inspector = new ParquetShortInspector();
+ }
+
+ @Test
+ public void testShortWritable() {
+ ShortWritable obj = new ShortWritable((short) 5);
+ assertEquals(obj, inspector.getPrimitiveWritableObject(obj));
+ assertEquals((short) 5, inspector.getPrimitiveJavaObject(obj));
+ }
+
+ @Test
+ public void testIntWritable() {
+ IntWritable obj = new IntWritable(10);
+ assertEquals(new ShortWritable((short) 10), inspector.getPrimitiveWritableObject(obj));
+ assertEquals((short) 10, inspector.getPrimitiveJavaObject(obj));
+ }
+
+ @Test
+ public void testNull() {
+ assertNull(inspector.getPrimitiveWritableObject(null));
+ assertNull(inspector.getPrimitiveJavaObject(null));
+ }
+
+ @Test
+ public void testCreate() {
+ assertEquals(new ShortWritable((short) 8), inspector.create((short) 8));
+ }
+
+ @Test
+ public void testSet() {
+ ShortWritable obj = new ShortWritable();
+ assertEquals(new ShortWritable((short) 12), inspector.set(obj, (short) 12));
+ }
+
+ @Test
+ public void testGet() {
+ ShortWritable obj = new ShortWritable((short) 15);
+ assertEquals((short) 15, inspector.get(obj));
+ }
+}
Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Sat Mar 8 00:56:47 2014
@@ -0,0 +1,38 @@
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging;
+
+SELECT * FROM parquet_types;
+
+SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ AVG(cfloat),
+ STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
Added: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Sat Mar 8 00:56:47 2014
@@ -0,0 +1,142 @@
+PREHOOK: query: DROP TABLE parquet_types_staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types_staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE parquet_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: SELECT * FROM parquet_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+100 1 1 1.0 0.0 abc
+101 2 2 1.1 0.3 def
+102 3 3 1.2 0.6 ghi
+103 1 4 1.3 0.9 jkl
+104 2 5 1.4 1.2 mno
+105 3 1 1.0 1.5 pqr
+106 1 2 1.1 1.8 stu
+107 2 3 1.2 2.1 vwx
+108 3 4 1.3 2.4 yza
+109 1 5 1.4 2.7 bcd
+110 2 1 1.0 3.0 efg
+111 3 2 1.1 3.3 hij
+112 1 3 1.2 3.6 klm
+113 2 4 1.3 3.9 nop
+114 3 5 1.4 4.2 qrs
+115 1 1 1.0 4.5 tuv
+116 2 2 1.1 4.8 wxy
+117 3 3 1.2 5.1 zab
+118 1 4 1.3 5.4 cde
+119 2 5 1.4 5.7 fgh
+120 3 1 1.0 6.0 ijk
+PREHOOK: query: SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ AVG(cfloat),
+ STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ AVG(cfloat),
+ STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+1 118 1 7 1.1857142789023263 1.8000000000000003
+2 119 1 7 1.2142857142857142 1.8
+3 120 1 7 1.171428578240531 1.7999999999999996