You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/20 00:52:43 UTC
svn commit: r1494795 - in
/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql:
io/orc/CommonOrcInputFormat.java io/orc/OrcInputFormat.java
parse/BaseSemanticAnalyzer.java
Author: hashutosh
Date: Wed Jun 19 22:52:43 2013
New Revision: 1494795
URL: http://svn.apache.org/r1494795
Log:
HIVE-4754 : OrcInputFormat should be enhanced to provide vectorized input. (Jitendra Nath Pandey via Ashutosh Chauhan)
Removed:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1494795&r1=1494794&r2=1494795&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Wed Jun 19 22:52:43 2013
@@ -18,11 +18,17 @@
package org.apache.hadoop.hive.ql.io.orc;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.NullWritable;
@@ -33,15 +39,13 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* A MapReduce/Hive input format for ORC files.
*/
public class OrcInputFormat extends FileInputFormat<NullWritable, OrcStruct>
- implements InputFormatChecker {
+ implements InputFormatChecker, VectorizedInputFormatInterface {
+
+ VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat();
private static class OrcRecordReader
implements RecordReader<NullWritable, OrcStruct> {
@@ -51,6 +55,7 @@ public class OrcInputFormat extends Fil
private final int numColumns;
private float progress = 0.0f;
+
OrcRecordReader(Reader file, Configuration conf,
long offset, long length) throws IOException {
this.reader = file.rows(offset, length,
@@ -161,6 +166,15 @@ public class OrcInputFormat extends Fil
public RecordReader<NullWritable, OrcStruct>
getRecordReader(InputSplit inputSplit, JobConf conf,
Reporter reporter) throws IOException {
+
+ boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(),
+ false);
+ if (vectorPath) {
+ RecordReader<NullWritable, VectorizedRowBatch> vorr = voif.getRecordReader(inputSplit, conf,
+ reporter);
+ return (RecordReader) vorr;
+ }
+
FileSplit fileSplit = (FileSplit) inputSplit;
Path path = fileSplit.getPath();
FileSystem fs = path.getFileSystem(conf);
@@ -173,6 +187,13 @@ public class OrcInputFormat extends Fil
public boolean validateInput(FileSystem fs, HiveConf conf,
ArrayList<FileStatus> files
) throws IOException {
+ boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(),
+ false);
+
+ if (vectorPath) {
+ return voif.validateInput(fs, conf, files);
+ }
+
if (files.size() <= 0) {
return false;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1494795&r1=1494794&r2=1494795&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Wed Jun 19 22:52:43 2013
@@ -50,7 +50,7 @@ import org.apache.hadoop.hive.ql.hooks.W
import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
-import org.apache.hadoop.hive.ql.io.orc.CommonOrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.lib.Node;
@@ -114,7 +114,7 @@ public abstract class BaseSemanticAnalyz
.getName();
protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class
.getName();
- protected static final String ORCFILE_INPUT = CommonOrcInputFormat.class
+ protected static final String ORCFILE_INPUT = OrcInputFormat.class
.getName();
protected static final String ORCFILE_OUTPUT = OrcOutputFormat.class
.getName();