You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/03/08 01:56:47 UTC

svn commit: r1575466 - in /hive/trunk: data/files/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: xuefu
Date: Sat Mar  8 00:56:47 2014
New Revision: 1575466

URL: http://svn.apache.org/r1575466
Log:
HIVE-6414: ParquetInputFormat provides data values that do not match the object inspectors (Justin Coffey via Xuefu)

Added:
    hive/trunk/data/files/parquet_types.txt
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
    hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
    hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java

Added: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1575466&view=auto
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (added)
+++ hive/trunk/data/files/parquet_types.txt Sat Mar  8 00:56:47 2014
@@ -0,0 +1,21 @@
+100|1|1|1.0|0.0|abc
+101|2|2|1.1|0.3|def
+102|3|3|1.2|0.6|ghi
+103|1|4|1.3|0.9|jkl
+104|2|5|1.4|1.2|mno
+105|3|1|1.0|1.5|pqr
+106|1|2|1.1|1.8|stu
+107|2|3|1.2|2.1|vwx
+108|3|4|1.3|2.4|yza
+109|1|5|1.4|2.7|bcd
+110|2|1|1.0|3.0|efg
+111|3|2|1.1|3.3|hij
+112|1|3|1.2|3.6|klm
+113|2|4|1.3|3.9|nop
+114|3|5|1.4|4.2|qrs
+115|1|1|1.0|4.5|tuv
+116|2|2|1.1|4.8|wxy
+117|3|3|1.2|5.1|zab
+118|1|4|1.3|5.4|cde
+119|2|5|1.4|5.7|fgh
+120|3|1|1.0|6.0|ijk

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java?rev=1575466&r1=1575465&r2=1575466&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java Sat Mar  8 00:56:47 2014
@@ -35,6 +35,11 @@ public class ParquetByteInspector extend
   }
 
   @Override
+  public Object getPrimitiveJavaObject(final Object o) {
+    return o == null ? null : get(o);
+  }
+
+  @Override
   public Object create(final byte val) {
     return new ByteWritable(val);
   }
@@ -51,6 +56,7 @@ public class ParquetByteInspector extend
     if (o instanceof IntWritable) {
       return (byte) ((IntWritable) o).get();
     }
+
     return ((ByteWritable) o).get();
   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java?rev=1575466&r1=1575465&r2=1575466&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java Sat Mar  8 00:56:47 2014
@@ -35,6 +35,11 @@ public class ParquetShortInspector exten
   }
 
   @Override
+  public Object getPrimitiveJavaObject(final Object o) {
+    return o == null ? null : get(o);
+  }
+
+  @Override
   public Object create(final short val) {
     return new ShortWritable(val);
   }
@@ -51,6 +56,7 @@ public class ParquetShortInspector exten
     if (o instanceof IntWritable) {
       return (short) ((IntWritable) o).get();
     }
+
     return ((ShortWritable) o).get();
   }
 }

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java Sat Mar  8 00:56:47 2014
@@ -0,0 +1,55 @@
+package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestParquetByteInspector {
+
+  private ParquetByteInspector inspector;
+
+  @Before
+  public void setUp() {
+    inspector = new ParquetByteInspector();
+  }
+
+  @Test
+  public void testByteWritable() {
+    ByteWritable obj = new ByteWritable((byte) 5);
+    assertEquals(obj, inspector.getPrimitiveWritableObject(obj));
+    assertEquals((byte) 5, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testIntWritable() {
+    IntWritable obj = new IntWritable(10);
+    assertEquals(new ByteWritable((byte) 10), inspector.getPrimitiveWritableObject(obj));
+    assertEquals((byte) 10, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testNull() {
+    assertNull(inspector.getPrimitiveWritableObject(null));
+    assertNull(inspector.getPrimitiveJavaObject(null));
+  }
+
+  @Test
+  public void testCreate() {
+    assertEquals(new ByteWritable((byte) 8), inspector.create((byte) 8));
+  }
+
+  @Test
+  public void testSet() {
+    ByteWritable obj = new ByteWritable();
+    assertEquals(new ByteWritable((byte) 12), inspector.set(obj, (byte) 12));
+  }
+
+  @Test
+  public void testGet() {
+    ByteWritable obj = new ByteWritable((byte) 15);
+    assertEquals((byte) 15, inspector.get(obj));
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java Sat Mar  8 00:56:47 2014
@@ -0,0 +1,57 @@
+package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestParquetShortInspector {
+
+  private ParquetShortInspector inspector;
+
+  @Before
+  public void setUp() {
+    inspector = new ParquetShortInspector();
+  }
+
+  @Test
+  public void testShortWritable() {
+    ShortWritable obj = new ShortWritable((short) 5);
+    assertEquals(obj, inspector.getPrimitiveWritableObject(obj));
+    assertEquals((short) 5, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testIntWritable() {
+    IntWritable obj = new IntWritable(10);
+    assertEquals(new ShortWritable((short) 10), inspector.getPrimitiveWritableObject(obj));
+    assertEquals((short) 10, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testNull() {
+    assertNull(inspector.getPrimitiveWritableObject(null));
+    assertNull(inspector.getPrimitiveJavaObject(null));
+  }
+
+  @Test
+  public void testCreate() {
+    assertEquals(new ShortWritable((short) 8), inspector.create((short) 8));
+  }
+
+  @Test
+  public void testSet() {
+    ShortWritable obj = new ShortWritable();
+    assertEquals(new ShortWritable((short) 12), inspector.set(obj, (short) 12));
+  }
+
+  @Test
+  public void testGet() {
+    ShortWritable obj = new ShortWritable((short) 15);
+    assertEquals((short) 15, inspector.get(obj));
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Sat Mar  8 00:56:47 2014
@@ -0,0 +1,38 @@
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging;
+
+SELECT * FROM parquet_types;
+
+SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;

Added: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1575466&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Sat Mar  8 00:56:47 2014
@@ -0,0 +1,142 @@
+PREHOOK: query: DROP TABLE parquet_types_staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types_staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE parquet_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: SELECT * FROM parquet_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+100	1	1	1.0	0.0	abc
+101	2	2	1.1	0.3	def
+102	3	3	1.2	0.6	ghi
+103	1	4	1.3	0.9	jkl
+104	2	5	1.4	1.2	mno
+105	3	1	1.0	1.5	pqr
+106	1	2	1.1	1.8	stu
+107	2	3	1.2	2.1	vwx
+108	3	4	1.3	2.4	yza
+109	1	5	1.4	2.7	bcd
+110	2	1	1.0	3.0	efg
+111	3	2	1.1	3.3	hij
+112	1	3	1.2	3.6	klm
+113	2	4	1.3	3.9	nop
+114	3	5	1.4	4.2	qrs
+115	1	1	1.0	4.5	tuv
+116	2	2	1.1	4.8	wxy
+117	3	3	1.2	5.1	zab
+118	1	4	1.3	5.4	cde
+119	2	5	1.4	5.7	fgh
+120	3	1	1.0	6.0	ijk
+PREHOOK: query: SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+1	118	1	7	1.1857142789023263	1.8000000000000003
+2	119	1	7	1.2142857142857142	1.8
+3	120	1	7	1.171428578240531	1.7999999999999996