You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/11/07 18:33:14 UTC
svn commit: r1637416 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
Author: prasanthj
Date: Fri Nov 7 17:33:14 2014
New Revision: 1637416
URL: http://svn.apache.org/r1637416
Log:
HIVE-8778: ORC split elimination can cause NPE when column statistics is null (Prasanth J reviewed by Gopal V)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1637416&r1=1637415&r2=1637416&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Fri Nov 7 17:33:14 2014
@@ -2364,20 +2364,21 @@ class RecordReaderImpl implements Record
PredicateLeaf predicate) {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
Object minValue = getMin(cs);
+ Object maxValue = getMax(cs);
+ return evaluatePredicateRange(predicate, minValue, maxValue);
+ }
+
+ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
+ Object max) {
// if we didn't have any values, everything must have been null
- if (minValue == null) {
+ if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
return TruthValue.YES;
} else {
return TruthValue.NULL;
}
}
- Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue);
- }
- static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max) {
Location loc;
try {
// Predicate object and stats object can be one of the following base types
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1637416&r1=1637415&r2=1637416&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Fri Nov 7 17:33:14 2014
@@ -21,27 +21,6 @@ import static org.junit.Assert.assertArr
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.TimeZone;
-import java.util.TreeSet;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -66,9 +45,9 @@ import org.apache.hadoop.hive.ql.io.Comb
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -104,6 +83,27 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.TreeSet;
+
public class TestInputOutputFormat {
Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp"));
@@ -1032,6 +1032,24 @@ public class TestInputOutputFormat {
reader.close();
}
+ static class SimpleRow implements Writable {
+ Text z;
+
+ public SimpleRow(Text t) {
+ this.z = t;
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ throw new UnsupportedOperationException("unsupported");
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ throw new UnsupportedOperationException("unsupported");
+ }
+ }
+
static class NestedRow implements Writable {
int z;
MyRow r;
@@ -1685,4 +1703,89 @@ public class TestInputOutputFormat {
assertEquals("cost", leaves.get(0).getColumnName());
assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
}
+
+ @Test
+ @SuppressWarnings("unchecked,deprecation")
+ public void testSplitElimination() throws Exception {
+ Properties properties = new Properties();
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ SerDe serde = new OrcSerde();
+ OutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ conf.setInt("mapred.max.split.size", 50);
+ RecordWriter writer =
+ outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
+ Reporter.NULL);
+ writer.write(NullWritable.get(),
+ serde.serialize(new NestedRow(1,2,3), inspector));
+ writer.write(NullWritable.get(),
+ serde.serialize(new NestedRow(4,5,6), inspector));
+ writer.write(NullWritable.get(),
+ serde.serialize(new NestedRow(7,8,9), inspector));
+ writer.close(Reporter.NULL);
+ serde = new OrcSerde();
+ SearchArgument sarg =
+ SearchArgumentFactory.newBuilder()
+ .startAnd()
+ .lessThan("z", new Integer(0))
+ .end()
+ .build();
+ conf.set("sarg.pushdown", sarg.toKryo());
+ conf.set("hive.io.file.readcolumn.names", "z,r");
+ properties.setProperty("columns", "z,r");
+ properties.setProperty("columns.types", "int:struct<x:int,y:int>");
+ SerDeUtils.initializeSerDe(serde, conf, properties, null);
+ inspector = (StructObjectInspector) serde.getObjectInspector();
+ InputFormat<?,?> in = new OrcInputFormat();
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ InputSplit[] splits = in.getSplits(conf, 1);
+ assertEquals(0, splits.length);
+ }
+
+ @Test
+ @SuppressWarnings("unchecked,deprecation")
+ public void testSplitEliminationNullStats() throws Exception {
+ Properties properties = new Properties();
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(SimpleRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ SerDe serde = new OrcSerde();
+ OutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ conf.setInt("mapred.max.split.size", 50);
+ RecordWriter writer =
+ outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
+ Reporter.NULL);
+ writer.write(NullWritable.get(),
+ serde.serialize(new SimpleRow(null), inspector));
+ writer.write(NullWritable.get(),
+ serde.serialize(new SimpleRow(null), inspector));
+ writer.write(NullWritable.get(),
+ serde.serialize(new SimpleRow(null), inspector));
+ writer.close(Reporter.NULL);
+ serde = new OrcSerde();
+ SearchArgument sarg =
+ SearchArgumentFactory.newBuilder()
+ .startAnd()
+ .lessThan("z", new String("foo"))
+ .end()
+ .build();
+ conf.set("sarg.pushdown", sarg.toKryo());
+ conf.set("hive.io.file.readcolumn.names", "z");
+ properties.setProperty("columns", "z");
+ properties.setProperty("columns.types", "string");
+ SerDeUtils.initializeSerDe(serde, conf, properties, null);
+ inspector = (StructObjectInspector) serde.getObjectInspector();
+ InputFormat<?,?> in = new OrcInputFormat();
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ InputSplit[] splits = in.getSplits(conf, 1);
+ assertEquals(0, splits.length);
+ }
+
}