You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/10/01 06:48:48 UTC
svn commit: r1527883 [4/6] - in /hive/branches/tez: ./
ant/src/org/apache/hadoop/hive/ant/
beeline/src/java/org/apache/hive/beeline/ bin/
common/src/java/org/apache/hadoop/hive/conf/ conf/
contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java Tue Oct 1 04:48:44 2013
@@ -17,10 +17,9 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.FSRecordWriter;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -45,7 +44,7 @@ public class OrcOutputFormat extends Fil
private static class OrcRecordWriter
implements RecordWriter<NullWritable, OrcSerdeRow>,
- FileSinkOperator.RecordWriter {
+ FSRecordWriter {
private Writer writer = null;
private final Path path;
private final OrcFile.WriterOptions options;
@@ -105,7 +104,7 @@ public class OrcOutputFormat extends Fil
}
@Override
- public FileSinkOperator.RecordWriter
+ public FSRecordWriter
getHiveRecordWriter(JobConf conf,
Path path,
Class<? extends Writable> valueClass,
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java Tue Oct 1 04:48:44 2013
@@ -17,9 +17,16 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedSerde;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -29,26 +36,22 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Properties;
-
/**
* A serde class for ORC.
* It transparently passes the object to/from the ORC file reader/writer.
*/
-public class OrcSerde implements SerDe {
+public class OrcSerde implements SerDe, VectorizedSerde {
private static final Log LOG = LogFactory.getLog(OrcSerde.class);
private final OrcSerdeRow row = new OrcSerdeRow();
private ObjectInspector inspector = null;
+ private VectorizedOrcSerde vos = null;
+
final class OrcSerdeRow implements Writable {
- private Object realRow;
- private ObjectInspector inspector;
+ Object realRow;
+ ObjectInspector inspector;
@Override
public void write(DataOutput dataOutput) throws IOException {
@@ -79,7 +82,7 @@ public class OrcSerde implements SerDe {
// Parse the configuration parameters
ArrayList<String> columnNames = new ArrayList<String>();
if (columnNameProperty != null && columnNameProperty.length() > 0) {
- for(String name: columnNameProperty.split(",")) {
+ for (String name : columnNameProperty.split(",")) {
columnNames.add(name);
}
}
@@ -96,7 +99,7 @@ public class OrcSerde implements SerDe {
}
ArrayList<TypeInfo> fieldTypes =
- TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
StructTypeInfo rootType = new StructTypeInfo();
rootType.setAllStructFieldNames(columnNames);
rootType.setAllStructFieldTypeInfos(fieldTypes);
@@ -128,6 +131,7 @@ public class OrcSerde implements SerDe {
/**
* Always returns null, since serialized size doesn't make sense in the
* context of ORC files.
+ *
* @return null
*/
@Override
@@ -135,4 +139,18 @@ public class OrcSerde implements SerDe {
return null;
}
+ @Override
+ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspector)
+ throws SerDeException {
+ if (vos == null) {
+ vos = new VectorizedOrcSerde(objInspector);
+ }
+ return vos.serialize(vrg, objInspector);
+ }
+
+ @Override
+ public void deserializeVector(Object rowBlob, int rowsInBatch, VectorizedRowBatch reuseBatch)
+ throws SerDeException {
+ // nothing to do here
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java Tue Oct 1 04:48:44 2013
@@ -39,6 +39,19 @@ public interface Reader {
long getNumberOfRows();
/**
+ * Get the deserialized data size of the file
+ * @return raw data size
+ */
+ long getRawDataSize();
+
+ /**
+ * Get the deserialized data size of the specified columns
+ * @param colNames
+ * @return raw data size of columns
+ */
+ long getRawDataSizeOfColumns(List<String> colNames);
+
+ /**
* Get the user metadata keys.
* @return the set of metadata keys
*/
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Oct 1 04:48:44 2013
@@ -343,4 +343,14 @@ final class ReaderImpl implements Reader
include, footer.getRowIndexStride(), sarg, columnNames);
}
+ @Override
+ public long getRawDataSize() {
+ return 0;
+ }
+
+ @Override
+ public long getRawDataSizeOfColumns(List<String> colNames) {
+ return 0;
+ }
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java Tue Oct 1 04:48:44 2013
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
/**
* A row-by-row iterator for ORC files.
*/
@@ -39,6 +41,16 @@ public interface RecordReader {
Object next(Object previous) throws IOException;
/**
+ * Read the next row batch. The size of the batch to read cannot be controlled
+ * by the callers. Caller need to look at VectorizedRowBatch.size of the retunred
+ * object to know the batch size read.
+ * @param previousBatch a row batch object that can be reused by the reader
+ * @return the row batch that was read
+ * @throws java.io.IOException
+ */
+ VectorizedRowBatch nextBatch(VectorizedRowBatch previousBatch) throws IOException;
+
+ /**
* Get the row number of the row that will be returned by the following
* call to next().
* @return the row number from 0 to the number of rows in the file
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Oct 1 04:48:44 2013
@@ -27,15 +27,17 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -234,6 +236,38 @@ class RecordReaderImpl implements Record
}
return previous;
}
+ /**
+ * Populates the isNull vector array in the previousVector object based on
+ * the present stream values. This function is called from all the child
+ * readers, and they all set the values based on isNull field value.
+ * @param previousVector The columnVector object whose isNull value is populated
+ * @param batchSize Size of the column vector
+ * @return
+ * @throws IOException
+ */
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+
+ ColumnVector result = (ColumnVector) previousVector;
+ if (present != null) {
+ // Set noNulls and isNull vector of the ColumnVector based on
+ // present stream
+ result.noNulls = true;
+ for (int i = 0; i < batchSize; i++) {
+ result.isNull[i] = (present.next() != 1);
+ if (result.noNulls && result.isNull[i]) {
+ result.noNulls = false;
+ }
+ }
+ } else {
+ // There is not present stream, this means that all the values are
+ // present.
+ result.noNulls = true;
+ for (int i = 0; i < batchSize; i++) {
+ result.isNull[i] = false;
+ }
+ }
+ return previousVector;
+ }
}
private static class BooleanTreeReader extends TreeReader{
@@ -277,6 +311,23 @@ class RecordReaderImpl implements Record
}
return result;
}
+
+ @Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
+ }
}
private static class ByteTreeReader extends TreeReader{
@@ -317,6 +368,23 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skip(countNonNulls(items));
}
@@ -370,6 +438,23 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skip(countNonNulls(items));
}
@@ -423,6 +508,23 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skip(countNonNulls(items));
}
@@ -476,6 +578,23 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skip(countNonNulls(items));
}
@@ -520,6 +639,39 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ DoubleColumnVector result = null;
+ if (previousVector == null) {
+ result = new DoubleColumnVector();
+ } else {
+ result = (DoubleColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ for (int i = 0; i < batchSize; i++) {
+ if (!result.isNull[i]) {
+ result.vector[i] = SerializationUtils.readFloat(stream);
+ } else {
+
+ // If the value is not present then set NaN
+ result.vector[i] = Double.NaN;
+ }
+ }
+
+ // Set isRepeating flag
+ result.isRepeating = true;
+ for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) {
+ if (result.vector[i] != result.vector[i + 1]) {
+ result.isRepeating = false;
+ }
+ }
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
items = countNonNulls(items);
for(int i=0; i < items; ++i) {
@@ -568,6 +720,38 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ DoubleColumnVector result = null;
+ if (previousVector == null) {
+ result = new DoubleColumnVector();
+ } else {
+ result = (DoubleColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ for (int i = 0; i < batchSize; i++) {
+ if (!result.isNull[i]) {
+ result.vector[i] = SerializationUtils.readDouble(stream);
+ } else {
+ // If the value is not present then set NaN
+ result.vector[i] = Double.NaN;
+ }
+ }
+
+ // Set isRepeating flag
+ result.isRepeating = true;
+ for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) {
+ if (result.vector[i] != result.vector[i + 1]) {
+ result.isRepeating = false;
+ }
+ }
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
items = countNonNulls(items);
stream.skip(items * 8);
@@ -636,6 +820,12 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ throw new UnsupportedOperationException(
+ "NextBatch is not supported operation for Binary type");
+ }
+
+ @Override
void skipRows(long items) throws IOException {
items = countNonNulls(items);
long lengthToSkip = 0;
@@ -649,6 +839,7 @@ class RecordReaderImpl implements Record
private static class TimestampTreeReader extends TreeReader{
private IntegerReader data = null;
private IntegerReader nanos = null;
+ private final LongColumnVector nanoVector = new LongColumnVector();
TimestampTreeReader(Path path, int columnId) {
super(path, columnId);
@@ -708,6 +899,53 @@ class RecordReaderImpl implements Record
return result;
}
+ @Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ data.nextVector(result, batchSize);
+ nanoVector.isNull = result.isNull;
+ nanos.nextVector(nanoVector, batchSize);
+
+ if(result.isRepeating && nanoVector.isRepeating) {
+ batchSize = 1;
+ }
+
+ // Non repeating values preset in the vector. Iterate thru the vector and populate the time
+ for (int i = 0; i < batchSize; i++) {
+ if (!result.isNull[i]) {
+ long ms = (result.vector[result.isRepeating ? 0 : i] + WriterImpl.BASE_TIMESTAMP)
+ * WriterImpl.MILLIS_PER_SECOND;
+ long ns = parseNanos(nanoVector.vector[nanoVector.isRepeating ? 0 : i]);
+ // the rounding error exists because java always rounds up when dividing integers
+ // -42001/1000 = -42; and -42001 % 1000 = -1 (+ 1000)
+ // to get the correct value we need
+ // (-42 - 1)*1000 + 999 = -42001
+ // (42)*1000 + 1 = 42001
+ if(ms < 0 && ns != 0) {
+ ms -= 1000;
+ }
+ // Convert millis into nanos and add the nano vector value to it
+ result.vector[i] = (ms * 1000000) + ns;
+ }
+ }
+
+ if(!(result.isRepeating && nanoVector.isRepeating)) {
+ // both have to repeat for the result to be repeating
+ result.isRepeating = false;
+ }
+
+ return result;
+ }
+
private static int parseNanos(long serialized) {
int zeros = 7 & (int) serialized;
int result = (int) serialized >>> 3;
@@ -826,6 +1064,12 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ throw new UnsupportedOperationException(
+ "NextVector is not supported operation for Decimal type");
+ }
+
+ @Override
void skipRows(long items) throws IOException {
items = countNonNulls(items);
for(int i=0; i < items; i++) {
@@ -885,6 +1129,11 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ return reader.nextVector(previousVector, batchSize);
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skipRows(items);
}
@@ -898,8 +1147,11 @@ class RecordReaderImpl implements Record
private InStream stream;
private IntegerReader lengths;
+ private final LongColumnVector scratchlcv;
+
StringDirectTreeReader(Path path, int columnId) {
super(path, columnId);
+ scratchlcv = new LongColumnVector();
}
@Override
@@ -958,6 +1210,72 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ BytesColumnVector result = null;
+ if (previousVector == null) {
+ result = new BytesColumnVector();
+ } else {
+ result = (BytesColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read lengths
+ scratchlcv.isNull = result.isNull;
+ lengths.nextVector(scratchlcv, batchSize);
+ int totalLength = 0;
+ if (!scratchlcv.isRepeating) {
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ totalLength += (int) scratchlcv.vector[i];
+ }
+ }
+ } else {
+ if (!scratchlcv.isNull[0]) {
+ totalLength = (int) (batchSize * scratchlcv.vector[0]);
+ }
+ }
+
+ //Read all the strings for this batch
+ byte[] allBytes = new byte[totalLength];
+ int offset = 0;
+ int len = totalLength;
+ while (len > 0) {
+ int bytesRead = stream.read(allBytes, offset, len);
+ if (bytesRead < 0) {
+ throw new EOFException("Can't finish byte read from " + stream);
+ }
+ len -= bytesRead;
+ offset += bytesRead;
+ }
+
+ // Too expensive to figure out 'repeating' by comparisons.
+ result.isRepeating = false;
+ offset = 0;
+ if (!scratchlcv.isRepeating) {
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
+ offset += scratchlcv.vector[i];
+ } else {
+ result.setRef(i, allBytes, 0, 0);
+ }
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
+ offset += scratchlcv.vector[0];
+ } else {
+ result.setRef(i, allBytes, 0, 0);
+ }
+ }
+ }
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
items = countNonNulls(items);
long lengthToSkip = 0;
@@ -977,8 +1295,12 @@ class RecordReaderImpl implements Record
private int[] dictionaryOffsets;
private IntegerReader reader;
+ private byte[] dictionaryBufferInBytesCache = null;
+ private final LongColumnVector scratchlcv;
+
StringDictionaryTreeReader(Path path, int columnId) {
super(path, columnId);
+ scratchlcv = new LongColumnVector();
}
@Override
@@ -1004,6 +1326,8 @@ class RecordReaderImpl implements Record
if (in.available() > 0) {
dictionaryBuffer = new DynamicByteArray(64, in.available());
dictionaryBuffer.readAll(in);
+ // Since its start of strip invalidate the cache.
+ dictionaryBufferInBytesCache = null;
} else {
dictionaryBuffer = null;
}
@@ -1050,14 +1374,7 @@ class RecordReaderImpl implements Record
result = (Text) previous;
}
int offset = dictionaryOffsets[entry];
- int length;
- // if it isn't the last entry, subtract the offsets otherwise use
- // the buffer length.
- if (entry < dictionaryOffsets.length - 1) {
- length = dictionaryOffsets[entry + 1] - offset;
- } else {
- length = dictionaryBuffer.size() - offset;
- }
+ int length = getDictionaryEntryLength(entry, offset);
// If the column is just empty strings, the size will be zero,
// so the buffer will be null, in that case just return result
// as it will default to empty
@@ -1071,6 +1388,74 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ BytesColumnVector result = null;
+ int offset = 0, length = 0;
+ if (previousVector == null) {
+ result = new BytesColumnVector();
+ } else {
+ result = (BytesColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ if (dictionaryBuffer != null) {
+
+ // Load dictionaryBuffer into cache.
+ if (dictionaryBufferInBytesCache == null) {
+ dictionaryBufferInBytesCache = dictionaryBuffer.get();
+ }
+
+ // Read string offsets
+ scratchlcv.isNull = result.isNull;
+ reader.nextVector(scratchlcv, batchSize);
+ if (!scratchlcv.isRepeating) {
+
+ // The vector has non-repeating strings. Iterate thru the batch
+ // and set strings one by one
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
+ length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
+ result.setRef(i, dictionaryBufferInBytesCache, offset, length);
+ } else {
+ // If the value is null then set offset and length to zero (null string)
+ result.setRef(i, dictionaryBufferInBytesCache, 0, 0);
+ }
+ }
+ } else {
+ // If the value is repeating then just set the first value in the
+ // vector and set the isRepeating flag to true. No need to iterate thru and
+ // set all the elements to the same value
+ offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
+ length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
+ result.setRef(0, dictionaryBufferInBytesCache, offset, length);
+ }
+ result.isRepeating = scratchlcv.isRepeating;
+ } else {
+ // Entire stripe contains null strings.
+ result.isRepeating = true;
+ result.noNulls = false;
+ result.isNull[0] = true;
+ result.setRef(0, "".getBytes(), 0, 0);
+ }
+ return result;
+ }
+
+ int getDictionaryEntryLength(int entry, int offset) {
+ int length = 0;
+ // if it isn't the last entry, subtract the offsets otherwise use
+ // the buffer length.
+ if (entry < dictionaryOffsets.length - 1) {
+ length = dictionaryOffsets[entry + 1] - offset;
+ } else {
+ length = dictionaryBuffer.size() - offset;
+ }
+ return length;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skip(countNonNulls(items));
}
@@ -1162,6 +1547,28 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ ColumnVector[] result = null;
+ if (previousVector == null) {
+ result = new ColumnVector[fields.length];
+ } else {
+ result = (ColumnVector[]) previousVector;
+ }
+
+ // Read all the members of struct as column vectors
+ for (int i = 0; i < fields.length; i++) {
+ if (fields[i] != null) {
+ if (result[i] == null) {
+ result[i] = (ColumnVector) fields[i].nextVector(null, batchSize);
+ } else {
+ fields[i].nextVector(result[i], batchSize);
+ }
+ }
+ }
+ return result;
+ }
+
+ @Override
void startStripe(Map<StreamName, InStream> streams,
List<OrcProto.ColumnEncoding> encodings
) throws IOException {
@@ -1231,6 +1638,12 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ throw new UnsupportedOperationException(
+ "NextVector is not supported operation for Union type");
+ }
+
+ @Override
void startStripe(Map<StreamName, InStream> streams,
List<OrcProto.ColumnEncoding> encodings
) throws IOException {
@@ -1308,6 +1721,11 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previous, long batchSize) throws IOException {
+ throw new UnsupportedOperationException(
+ "NextVector is not supported operation for List type");
+ }
+
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
(encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
@@ -1396,6 +1814,11 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previous, long batchSize) throws IOException {
+ throw new UnsupportedOperationException(
+ "NextVector is not supported operation for Map type");
+ }
+
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
(encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
@@ -2196,6 +2619,31 @@ class RecordReaderImpl implements Record
}
@Override
+ public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOException {
+ VectorizedRowBatch result = null;
+ if (rowInStripe >= rowCountInStripe) {
+ currentStripe += 1;
+ readStripe();
+ }
+
+ long batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (rowCountInStripe - rowInStripe));
+ rowInStripe += batchSize;
+ if (previous == null) {
+ ColumnVector[] cols = (ColumnVector[]) reader.nextVector(null, (int) batchSize);
+ result = new VectorizedRowBatch(cols.length);
+ result.cols = cols;
+ } else {
+ result = (VectorizedRowBatch) previous;
+ result.selectedInUse = false;
+ reader.nextVector(result.cols, (int) batchSize);
+ }
+
+ result.size = (int) batchSize;
+ advanceToNextRow(rowInStripe + rowBaseInStripe);
+ return result;
+ }
+
+ @Override
public void close() throws IOException {
file.close();
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java Tue Oct 1 04:48:44 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
/**
* A reader that reads a sequence of bytes. A control byte is read before
* each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
@@ -82,6 +84,29 @@ class RunLengthByteReader {
return result;
}
+ void nextVector(LongColumnVector previous, long previousLen)
+ throws IOException {
+ previous.isRepeating = true;
+ for (int i = 0; i < previousLen; i++) {
+ if (!previous.isNull[i]) {
+ previous.vector[i] = next();
+ } else {
+ // The default value of null for int types in vectorized
+ // processing is 1, so set that if the value is null
+ previous.vector[i] = 1;
+ }
+
+ // The default value for nulls in Vectorization for int types is 1
+ // and given that non null value can also be 1, we need to check for isNull also
+ // when determining the isRepeating flag.
+ if (previous.isRepeating
+ && i > 0
+ && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
+ previous.isRepeating = false;
+ }
+ }
+ }
+
void seek(PositionProvider index) throws IOException {
input.seek(index);
int consumed = (int) index.getNext();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java Tue Oct 1 04:48:44 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
/**
* A reader that reads a sequence of integers.
* */
@@ -91,6 +93,30 @@ class RunLengthIntegerReader implements
}
@Override
+ public void nextVector(LongColumnVector previous, long previousLen)
+ throws IOException {
+ previous.isRepeating = true;
+ for (int i = 0; i < previousLen; i++) {
+ if (!previous.isNull[i]) {
+ previous.vector[i] = next();
+ } else {
+ // The default value of null for int type in vectorized
+ // processing is 1, so set that if the value is null
+ previous.vector[i] = 1;
+ }
+
+ // The default value for nulls in Vectorization for int types is 1
+ // and given that non null value can also be 1, we need to check for isNull also
+ // when determining the isRepeating flag.
+ if (previous.isRepeating
+ && i > 0
+ && (previous.vector[i - 1] != previous.vector[i] || previous.isNull[i - 1] != previous.isNull[i])) {
+ previous.isRepeating = false;
+ }
+ }
+ }
+
+ @Override
public void seek(PositionProvider index) throws IOException {
input.seek(index);
int consumed = (int) index.getNext();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java Tue Oct 1 04:48:44 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
/**
@@ -322,4 +323,28 @@ class RunLengthIntegerReaderV2 implement
numValues -= consume;
}
}
+
+ @Override
+ public void nextVector(LongColumnVector previous, long previousLen) throws IOException {
+ previous.isRepeating = true;
+ for (int i = 0; i < previousLen; i++) {
+ if (!previous.isNull[i]) {
+ previous.vector[i] = next();
+ } else {
+ // The default value of null for int type in vectorized
+ // processing is 1, so set that if the value is null
+ previous.vector[i] = 1;
+ }
+
+ // The default value for nulls in Vectorization for int types is 1
+ // and given that non null value can also be 1, we need to check for isNull also
+ // when determining the isRepeating flag.
+ if (previous.isRepeating
+ && i > 0
+ && (previous.vector[i - 1] != previous.vector[i] ||
+ previous.isNull[i - 1] != previous.isNull[i])) {
+ previous.isRepeating = false;
+ }
+ }
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java Tue Oct 1 04:48:44 2013
@@ -47,4 +47,22 @@ public interface Writer {
* @throws IOException
*/
void close() throws IOException;
+
+ /**
+ * Return the deserialized data size. Raw data size will be compute when
+ * writing the file footer. Hence raw data size value will be available only
+ * after closing the writer.
+ *
+ * @return raw data size
+ */
+ long getRawDataSize();
+
+ /**
+ * Return the number of rows in file. Row count gets updated when flushing
+ * the stripes. To get accurate row count this method should be called after
+ * closing the writer.
+ *
+ * @return row count
+ */
+ long getNumberOfRows();
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Oct 1 04:48:44 2013
@@ -1871,4 +1871,14 @@ class WriterImpl implements Writer, Memo
rawWriter.close();
}
}
+
+ @Override
+ public long getRawDataSize() {
+ return 0;
+ }
+
+ @Override
+ public long getNumberOfRows() {
+ return 0;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java Tue Oct 1 04:48:44 2013
@@ -378,7 +378,8 @@ public final class CorrelationUtilities
// copies desc of cGBYm to cGBYr and remove cGBYm and cRS
GroupByOperator cGBYm = (GroupByOperator) parent;
- cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
+ cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
+ .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Tue Oct 1 04:48:44 2013
@@ -77,6 +77,13 @@ public class PhysicalOptimizer {
if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT)) {
resolvers.add(new BucketingSortingInferenceOptimizer());
}
+
+ // Vectorization should be the last optimization, because it doesn't modify the plan
+ // or any operators. It makes a very low level transformation to the expressions to
+ // run in the vectorized mode.
+ if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+ resolvers.add(new Vectorizer());
+ }
}
/**
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Tue Oct 1 04:48:44 2013
@@ -29,6 +29,7 @@ import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -59,10 +61,15 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
@@ -304,6 +311,13 @@ public abstract class BaseSemanticAnalyz
rootTasks = new ArrayList<Task<? extends Serializable>>();
}
+ public static String stripIdentifierQuotes(String val) {
+ if ((val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`')) {
+ val = val.substring(1, val.length() - 1);
+ }
+ return val;
+ }
+
public static String stripQuotes(String val) {
return PlanUtils.stripQuotes(val);
}
@@ -580,7 +594,7 @@ public abstract class BaseSemanticAnalyz
// child 2 is the optional comment of the column
if (child.getChildCount() == 3) {
col.setComment(unescapeSQLString(child.getChild(2).getText()));
- }
+ }
}
colList.add(col);
}
@@ -748,7 +762,7 @@ public abstract class BaseSemanticAnalyz
}
// check if the columns specified in the partition() clause are actually partition columns
- Utilities.validatePartSpec(tableHandle, partSpec);
+ validatePartSpec(tableHandle, partSpec, ast, conf);
// check if the partition spec is valid
if (numDynParts > 0) {
@@ -1115,4 +1129,79 @@ public abstract class BaseSemanticAnalyz
return storedAsDirs;
}
+ private static void getPartExprNodeDesc(ASTNode astNode,
+ Map<ASTNode, ExprNodeDesc> astExprNodeMap)
+ throws SemanticException, HiveException {
+
+ if ((astNode == null) || (astNode.getChildren() == null) ||
+ (astNode.getChildren().size() <= 1)) {
+ return;
+ }
+
+ TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null);
+ for (Node childNode : astNode.getChildren()) {
+ ASTNode childASTNode = (ASTNode)childNode;
+
+ if (childASTNode.getType() != HiveParser.TOK_PARTVAL) {
+ getPartExprNodeDesc(childASTNode, astExprNodeMap);
+ } else {
+ if (childASTNode.getChildren().size() <= 1) {
+ throw new HiveException("This is dynamic partitioning");
+ }
+
+ ASTNode partValASTChild = (ASTNode)childASTNode.getChildren().get(1);
+ astExprNodeMap.put((ASTNode)childASTNode.getChildren().get(0),
+ TypeCheckProcFactory.genExprNode(partValASTChild, typeCheckCtx).get(partValASTChild));
+ }
+ }
+ }
+
+ public static void validatePartSpec(Table tbl,
+ Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
+
+ Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
+
+ Utilities.validatePartSpec(tbl, partSpec);
+
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+ try {
+ getPartExprNodeDesc(astNode, astExprNodeMap);
+ } catch (HiveException e) {
+ return;
+ }
+ List<FieldSchema> parts = tbl.getPartitionKeys();
+ Map<String, String> partCols = new HashMap<String, String>(parts.size());
+ for (FieldSchema col : parts) {
+ partCols.put(col.getName(), col.getType().toLowerCase());
+ }
+ for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
+
+ String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
+ if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
+ astKeyName = stripIdentifierQuotes(astKeyName);
+ }
+ String colType = partCols.get(astKeyName);
+ ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
+
+ TypeInfo expectedType =
+ TypeInfoUtils.getTypeInfoFromTypeString(colType);
+ ObjectInspector outputOI =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+ Object value = null;
+ try {
+ value =
+ ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
+ evaluate(partSpec.get(astKeyName));
+ } catch (HiveException e) {
+ throw new SemanticException(e);
+ }
+ Object convertedValue =
+ ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
+ if (convertedValue == null) {
+ throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
+ inputOI.getTypeName(), outputOI.getTypeName()));
+ }
+ }
+ }
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Tue Oct 1 04:48:44 2013
@@ -522,6 +522,7 @@ public class ColumnStatsSemanticAnalyzer
boolean isPartitionStats = isPartitionLevelStats(tree);
PartitionList partList = null;
checkForPartitionColumns(colNames, getPartitionKeys(tableName));
+ validateSpecifiedColumnNames(tableName, colNames);
if (isPartitionStats) {
isTableLevel = false;
@@ -545,6 +546,25 @@ public class ColumnStatsSemanticAnalyzer
}
}
+ // fail early if the columns specified for column statistics are not valid
+ private void validateSpecifiedColumnNames(String tableName, List<String> specifiedCols)
+ throws SemanticException {
+ List<FieldSchema> fields = null;
+ try {
+ fields = db.getTable(tableName).getAllCols();
+ } catch (HiveException e) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
+ }
+ List<String> tableCols = Utilities.getColumnNamesFromFieldSchema(fields);
+
+ for(String sc : specifiedCols) {
+ if (!tableCols.contains(sc.toLowerCase())) {
+ String msg = "'" + sc + "' (possible columns are " + tableCols.toString() + ")";
+ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(msg));
+ }
+ }
+ }
+
private List<String> getPartitionKeys(String tableName) throws SemanticException {
List<FieldSchema> fields;
try {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Tue Oct 1 04:48:44 2013
@@ -2613,6 +2613,7 @@ public class DDLSemanticAnalyzer extends
currentLocation = null;
}
currentPart = getPartSpec(child);
+ validatePartSpec(tab, currentPart, (ASTNode)child, conf);
break;
case HiveParser.TOK_PARTITIONLOCATION:
// if location specified, set in partition
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Tue Oct 1 04:48:44 2013
@@ -1097,49 +1097,6 @@ public class PTFTranslator {
return componentInvocations;
}
-
- /*
- * Lead Lag functionality
- */
- public static class LeadLagInfo {
- /*
- * list of LL invocations in a Query.
- */
- List<ExprNodeGenericFuncDesc> leadLagExprs;
- /*
- * map from the Select Expr Node to the LL Function invocations in it.
- */
- Map<ExprNodeDesc, List<ExprNodeGenericFuncDesc>> mapTopExprToLLFunExprs;
-
- private void addLeadLagExpr(ExprNodeGenericFuncDesc llFunc) {
- leadLagExprs = leadLagExprs == null ? new ArrayList<ExprNodeGenericFuncDesc>() : leadLagExprs;
- leadLagExprs.add(llFunc);
- }
-
- public List<ExprNodeGenericFuncDesc> getLeadLagExprs() {
- return leadLagExprs;
- }
-
- public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
- addLeadLagExpr(llFuncExpr);
- mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
- new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
- List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
- if (funcList == null) {
- funcList = new ArrayList<ExprNodeGenericFuncDesc>();
- mapTopExprToLLFunExprs.put(topExpr, funcList);
- }
- funcList.add(llFuncExpr);
- }
-
- public List<ExprNodeGenericFuncDesc> getLLFuncExprsInTopExpr(ExprNodeDesc topExpr) {
- if (mapTopExprToLLFunExprs == null) {
- return null;
- }
- return mapTopExprToLLFunExprs.get(topExpr);
- }
- }
-
public static void validateNoLeadLagInValueBoundarySpec(ASTNode node)
throws SemanticException {
String errMsg = "Lead/Lag not allowed in ValueBoundary Spec";
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Tue Oct 1 04:48:44 2013
@@ -110,9 +110,14 @@ public final class TypeCheckProcFactory
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
+
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
+ if ((ctx == null) || (input == null)) {
+ return null;
+ }
+
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
if (colInfo != null) {
@@ -898,7 +903,7 @@ public final class TypeCheckProcFactory
}
}
- desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, children);
+ desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, children);
}
// UDFOPPositive is a no-op.
// However, we still create it, and then remove it here, to make sure we
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingExprNodeEvaluatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingExprNodeEvaluatorFactory.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingExprNodeEvaluatorFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingExprNodeEvaluatorFactory.java Tue Oct 1 04:48:44 2013
@@ -23,7 +23,6 @@ import java.util.List;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.PTFTranslator.LeadLagInfo;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java Tue Oct 1 04:48:44 2013
@@ -21,6 +21,8 @@ package org.apache.hadoop.hive.ql.plan;
public class AbstractOperatorDesc implements OperatorDesc {
protected transient Statistics statistics;
+ private boolean vectorMode = false;
+
@Override
@Explain(displayName = "Statistics", normalExplain = false)
public Statistics getStatistics() {
@@ -36,4 +38,13 @@ public class AbstractOperatorDesc implem
public Object clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException("clone not supported");
}
+
+ @Explain(displayName = "Vectorized execution", displayOnlyOnTrue = true)
+ public boolean getVectorModeOn() {
+ return vectorMode;
+ }
+
+ public void setVectorMode(boolean vm) {
+ this.vectorMode = vm;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java Tue Oct 1 04:48:44 2013
@@ -31,7 +31,6 @@ import org.apache.hadoop.hive.common.Jav
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -53,10 +52,10 @@ public class CreateTableDesc extends DDL
String databaseName;
String tableName;
boolean isExternal;
- ArrayList<FieldSchema> cols;
- ArrayList<FieldSchema> partCols;
- ArrayList<String> bucketCols;
- ArrayList<Order> sortCols;
+ List<FieldSchema> cols;
+ List<FieldSchema> partCols;
+ List<String> bucketCols;
+ List<Order> sortCols;
int numBuckets;
String fieldDelim;
String fieldEscape;
@@ -130,8 +129,12 @@ public class CreateTableDesc extends DDL
this.serdeProps = serdeProps;
this.tblProps = tblProps;
this.ifNotExists = ifNotExists;
- this.skewedColNames = new ArrayList<String>(skewedColNames);
- this.skewedColValues = new ArrayList<List<String>>(skewedColValues);
+ this.skewedColNames = copyList(skewedColNames);
+ this.skewedColValues = copyList(skewedColValues);
+ }
+
+ private static <T> List<T> copyList(List<T> copy) {
+ return copy == null ? null : new ArrayList<T>(copy);
}
@Explain(displayName = "columns")
@@ -166,7 +169,7 @@ public class CreateTableDesc extends DDL
this.tableName = tableName;
}
- public ArrayList<FieldSchema> getCols() {
+ public List<FieldSchema> getCols() {
return cols;
}
@@ -174,7 +177,7 @@ public class CreateTableDesc extends DDL
this.cols = cols;
}
- public ArrayList<FieldSchema> getPartCols() {
+ public List<FieldSchema> getPartCols() {
return partCols;
}
@@ -183,7 +186,7 @@ public class CreateTableDesc extends DDL
}
@Explain(displayName = "bucket columns")
- public ArrayList<String> getBucketCols() {
+ public List<String> getBucketCols() {
return bucketCols;
}
@@ -303,7 +306,7 @@ public class CreateTableDesc extends DDL
* @return the sortCols
*/
@Explain(displayName = "sort columns")
- public ArrayList<Order> getSortCols() {
+ public List<Order> getSortCols() {
return sortCols;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Tue Oct 1 04:48:44 2013
@@ -62,6 +62,7 @@ public class ExprNodeGenericFuncDesc ext
*/
private GenericUDF genericUDF;
private List<ExprNodeDesc> childExprs;
+ private transient String funcText;
/**
* This class uses a writableObjectInspector rather than a TypeInfo to store
* the canonical type information for this NodeDesc.
@@ -73,13 +74,19 @@ public class ExprNodeGenericFuncDesc ext
public ExprNodeGenericFuncDesc() {
}
+ /* If the function has an explicit name like func(args) then call a
+ * constructor that explicitly provides the function name in the
+ * funcText argument.
+ */
public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+ String funcText,
List<ExprNodeDesc> children) {
this(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo),
- genericUDF, children);
+ genericUDF, funcText, children);
}
public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+ String funcText,
List<ExprNodeDesc> children) {
super(TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
this.writableObjectInspector =
@@ -87,6 +94,18 @@ public class ExprNodeGenericFuncDesc ext
assert (genericUDF != null);
this.genericUDF = genericUDF;
this.childExprs = children;
+ this.funcText = funcText;
+ }
+
+ // Backward-compatibility interfaces for functions without a user-visible name.
+ public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+ List<ExprNodeDesc> children) {
+ this(typeInfo, genericUDF, null, children);
+ }
+
+ public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+ List<ExprNodeDesc> children) {
+ this(oi, genericUDF, null, children);
}
@Override
@@ -165,17 +184,20 @@ public class ExprNodeGenericFuncDesc ext
cloneCh.add(ch.clone());
}
ExprNodeGenericFuncDesc clone = new ExprNodeGenericFuncDesc(typeInfo,
- FunctionRegistry.cloneGenericUDF(genericUDF), cloneCh);
+ FunctionRegistry.cloneGenericUDF(genericUDF), funcText, cloneCh);
return clone;
}
/**
- * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the
- * children parameters.
+ * Create a ExprNodeGenericFuncDesc based on the genericUDFClass and the
+ * children parameters. If the function has an explicit name, the
+ * newInstance method should be passed the function name in the funcText
+ * argument.
*
* @throws UDFArgumentException
*/
public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+ String funcText,
List<ExprNodeDesc> children) throws UDFArgumentException {
ObjectInspector[] childrenOIs = new ObjectInspector[children.size()];
for (int i = 0; i < childrenOIs.length; i++) {
@@ -232,7 +254,15 @@ public class ExprNodeGenericFuncDesc ext
}
}
- return new ExprNodeGenericFuncDesc(oi, genericUDF, children);
+ return new ExprNodeGenericFuncDesc(oi, genericUDF, funcText, children);
+ }
+
+ /* Backward-compatibility interface for the case where there is no explicit
+ * name for the function.
+ */
+ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+ List<ExprNodeDesc> children) throws UDFArgumentException {
+ return newInstance(genericUDF, null, children);
}
@Override
@@ -285,4 +315,8 @@ public class ExprNodeGenericFuncDesc ext
public void setSortedExpr(boolean isSortedExpr) {
this.isSortedExpr = isSortedExpr;
}
+
+ public String getFuncText() {
+ return this.funcText;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Tue Oct 1 04:48:44 2013
@@ -112,6 +112,9 @@ public class MapWork extends BaseWork {
private boolean useBucketizedHiveInputFormat;
+ private Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
+ private boolean vectorMode = false;
+
public MapWork() {
}
@@ -479,4 +482,21 @@ public class MapWork extends BaseWork {
PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job);
}
}
+
+ public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
+ return scratchColumnVectorTypes;
+ }
+
+ public void setScratchColumnVectorTypes(
+ Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
+ this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+ }
+
+ public boolean getVectorMode() {
+ return vectorMode;
+ }
+
+ public void setVectorMode(boolean vectorMode) {
+ this.vectorMode = vectorMode;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java Tue Oct 1 04:48:44 2013
@@ -26,9 +26,9 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.PTFUtils;
+import org.apache.hadoop.hive.ql.parse.LeadLagInfo;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
-import org.apache.hadoop.hive.ql.parse.PTFTranslator.LeadLagInfo;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.TypeCheckCtx;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java Tue Oct 1 04:48:44 2013
@@ -29,7 +29,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.PTFPartition;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.PTFTranslator.LeadLagInfo;
+import org.apache.hadoop.hive.ql.parse.LeadLagInfo;
import org.apache.hadoop.hive.ql.parse.WindowingExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.plan.PTFDesc.BoundaryDef;
import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Tue Oct 1 04:48:44 2013
@@ -207,7 +207,7 @@ public class SessionState {
}
private static final SimpleDateFormat DATE_FORMAT =
- new SimpleDateFormat("yyyyMMddHHmm");
+ new SimpleDateFormat("yyyyMMddHHmm");
public void setCmd(String cmdString) {
conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, cmdString);
@@ -462,7 +462,7 @@ public class SessionState {
} catch (IOException e) {
console.printError("Unable to validate " + newFile + "\nException: "
+ e.getMessage(), "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
return null;
}
}
@@ -479,7 +479,7 @@ public class SessionState {
} catch (Exception e) {
console.printError("Unable to register " + newJar + "\nException: "
+ e.getMessage(), "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
return false;
}
}
@@ -493,7 +493,7 @@ public class SessionState {
} catch (Exception e) {
console.printError("Unable to unregister " + jarsToUnregister
+ "\nException: " + e.getMessage(), "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
return false;
}
}
@@ -579,7 +579,7 @@ public class SessionState {
}
private final HashMap<ResourceType, Set<String>> resource_map =
- new HashMap<ResourceType, Set<String>>();
+ new HashMap<ResourceType, Set<String>>();
public String add_resource(ResourceType t, String value) {
// By default don't convert to unix
@@ -797,7 +797,7 @@ public class SessionState {
public void close() throws IOException {
File resourceDir =
- new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
+ new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
LOG.debug("Removing resource dir " + resourceDir);
try {
if (resourceDir.exists()) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java Tue Oct 1 04:48:44 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF.IUDFUnaryString;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -39,7 +40,7 @@ import org.apache.hadoop.io.Text;
+ " 'H1'\n"
+ " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n"
+ " '46616365626F6F6B'")
-public class UDFHex extends UDF {
+public class UDFHex extends UDF implements IUDFUnaryString {
private final Text result = new Text();
private byte[] value = new byte[16];
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java Tue Oct 1 04:48:44 2013
@@ -27,95 +27,133 @@ import org.apache.hadoop.hive.ql.udf.gen
public enum JavaDataModel {
JAVA32 {
+ @Override
public int object() {
return JAVA32_OBJECT;
}
+ @Override
public int array() {
return JAVA32_ARRAY;
}
+ @Override
public int ref() {
return JAVA32_REF;
}
+ @Override
public int hashMap(int entry) {
// base = JAVA32_OBJECT + PRIMITIVES1 * 4 + JAVA32_FIELDREF * 3 + JAVA32_ARRAY;
// entry = JAVA32_OBJECT + JAVA32_FIELDREF + PRIMITIVES1
return 64 + 24 * entry;
}
+ @Override
+ public int hashMapEntry() {
+ return 24;
+ }
+
+ @Override
public int hashSet(int entry) {
// hashMap += JAVA32_OBJECT
return 80 + 24 * entry;
}
+ @Override
public int linkedHashMap(int entry) {
// hashMap += JAVA32_FIELDREF + PRIMITIVES1
// hashMap.entry += JAVA32_FIELDREF * 2
return 72 + 32 * entry;
}
+ @Override
public int linkedList(int entry) {
// base = JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_FIELDREF;
// entry = JAVA32_OBJECT + JAVA32_FIELDREF * 2
return 28 + 24 * entry;
}
+ @Override
public int arrayList() {
// JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_ARRAY;
return 44;
}
+
+ @Override
+ public int memoryAlign() {
+ return 8;
+ }
}, JAVA64 {
+ @Override
public int object() {
return JAVA64_OBJECT;
}
+ @Override
public int array() {
return JAVA64_ARRAY;
}
+ @Override
public int ref() {
return JAVA64_REF;
}
+ @Override
public int hashMap(int entry) {
// base = JAVA64_OBJECT + PRIMITIVES1 * 4 + JAVA64_FIELDREF * 3 + JAVA64_ARRAY;
// entry = JAVA64_OBJECT + JAVA64_FIELDREF + PRIMITIVES1
return 112 + 44 * entry;
}
+ @Override
+ public int hashMapEntry() {
+ return 44;
+ }
+
+ @Override
public int hashSet(int entry) {
// hashMap += JAVA64_OBJECT
return 144 + 44 * entry;
}
+ @Override
public int linkedHashMap(int entry) {
// hashMap += JAVA64_FIELDREF + PRIMITIVES1
// hashMap.entry += JAVA64_FIELDREF * 2
return 128 + 60 * entry;
}
+ @Override
public int linkedList(int entry) {
// base = JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_FIELDREF;
// entry = JAVA64_OBJECT + JAVA64_FIELDREF * 2
return 48 + 48 * entry;
}
+ @Override
public int arrayList() {
// JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_ARRAY;
return 80;
}
+
+ @Override
+ public int memoryAlign() {
+ return 8;
+ }
};
public abstract int object();
public abstract int array();
public abstract int ref();
public abstract int hashMap(int entry);
+ public abstract int hashMapEntry();
public abstract int hashSet(int entry);
public abstract int linkedHashMap(int entry);
public abstract int linkedList(int entry);
public abstract int arrayList();
+ public abstract int memoryAlign();
// ascii string
public int lengthFor(String string) {
@@ -161,6 +199,10 @@ public enum JavaDataModel {
return PRIMITIVES2;
}
+ public static int alignUp(int value, int align) {
+ return (value + align - 1) & ~(align - 1);
+ }
+
public static final int JAVA32_META = 12;
public static final int JAVA32_ARRAY_META = 16;
public static final int JAVA32_REF = 4;
@@ -176,6 +218,8 @@ public enum JavaDataModel {
public static final int PRIMITIVES1 = 4; // void, boolean, byte, short, int, float
public static final int PRIMITIVES2 = 8; // long, double
+ public static final int PRIMITIVE_BYTE = 1; // byte
+
private static JavaDataModel current;
public static JavaDataModel get() {
@@ -200,4 +244,27 @@ public enum JavaDataModel {
}
return ((size + 8) >> 3) << 3;
}
+
+ private int lengthForPrimitiveArrayOfSize(int primitiveSize, int length) {
+ return alignUp(array() + primitiveSize*length, memoryAlign());
+ }
+
+ public int lengthForByteArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
+ }
+ public int lengthForObjectArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(ref(), length);
+ }
+ public int lengthForLongArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(primitive2(), length);
+ }
+ public int lengthForDoubleArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(primitive2(), length);
+ }
+ public int lengthForIntArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(primitive1(), length);
+ }
+ public int lengthForBooleanArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
+ }
}
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Tue Oct 1 04:48:44 2013
@@ -59,12 +59,15 @@ import org.apache.hadoop.hive.cli.CliDri
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.common.io.CachingPrintStream;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
+import org.apache.hadoop.hive.ql.exec.vector.util.AllVectorTypesRecord;
+import org.apache.hadoop.hive.ql.exec.vector.util.OrcFileGenerator;
import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -108,7 +111,7 @@ public class QTestUtil {
public static final HashSet<String> srcTables = new HashSet<String>
(Arrays.asList(new String [] {
"src", "src1", "srcbucket", "srcbucket2", "src_json", "src_thrift",
- "src_sequencefile", "srcpart"
+ "src_sequencefile", "srcpart", "alltypesorc"
}));
private ParseDriver pd;
@@ -221,6 +224,11 @@ public class QTestUtil {
convertPathsFromWindowsToHdfs();
}
+ String vectorizationEnabled = System.getProperty("test.vectorization.enabled");
+ if(vectorizationEnabled != null && vectorizationEnabled.equalsIgnoreCase("true")) {
+ conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, true);
+ }
+
// Plug verifying metastore in for testing.
conf.setVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL,
"org.apache.hadoop.hive.metastore.VerifyingObjectStore");
@@ -260,6 +268,11 @@ public class QTestUtil {
String orgScratchDir = conf.getVar(HiveConf.ConfVars.SCRATCHDIR);
conf.setVar(HiveConf.ConfVars.SCRATCHDIR, getHdfsUriString(orgScratchDir));
+
+ if (miniMr) {
+ String orgAuxJarFolder = conf.getAuxJars();
+ conf.setAuxJars(getHdfsUriString("file://" + orgAuxJarFolder));
+ }
}
private String getHdfsUriString(String uriStr) {
@@ -516,7 +529,8 @@ public class QTestUtil {
for (String s : new String[] {"src", "src1", "src_json", "src_thrift",
"src_sequencefile", "srcpart", "srcbucket", "srcbucket2", "dest1",
"dest2", "dest3", "dest4", "dest4_sequencefile", "dest_j1", "dest_j2",
- "dest_g1", "dest_g2", "fetchtask_ioexception"}) {
+ "dest_g1", "dest_g2", "fetchtask_ioexception",
+ AllVectorTypesRecord.TABLE_NAME}) {
db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, s);
}
@@ -658,7 +672,17 @@ public class QTestUtil {
fpath = new Path(testFiles, "json.txt");
runLoadCmd("LOAD DATA LOCAL INPATH '" + fpath.toUri().getPath()
+ "' INTO TABLE src_json");
+
+ FileSystem localFs = FileSystem.getLocal(conf);
+ // create and load data into orc table
+ fpath = new Path(testFiles, AllVectorTypesRecord.TABLE_NAME);
+
+ runCreateTableCmd(AllVectorTypesRecord.TABLE_CREATE_COMMAND);
+ runLoadCmd("LOAD DATA LOCAL INPATH '" + fpath.toUri().getPath()
+ + "' INTO TABLE "+AllVectorTypesRecord.TABLE_NAME);
+
conf.setBoolean("hive.test.init.phase", false);
+
}
public void init() throws Exception {
@@ -1068,6 +1092,10 @@ public class QTestUtil {
in = new BufferedReader(new FileReader(fname));
out = new BufferedWriter(new FileWriter(fname + ".orig"));
while (null != (line = in.readLine())) {
+ // Ignore the empty lines on windows
+ if(line.isEmpty() && Shell.WINDOWS) {
+ continue;
+ }
out.write(line);
out.write('\n');
}
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Oct 1 04:48:44 2013
@@ -41,7 +41,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.FSRecordWriter;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
@@ -521,7 +521,7 @@ public class TestInputOutputFormat {
}
SerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
- FileSinkOperator.RecordWriter writer =
+ FSRecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
properties, Reporter.NULL);
writer.write(serde.serialize(new MyRow(1,2), inspector));
@@ -686,7 +686,7 @@ public class TestInputOutputFormat {
JobConf job = new JobConf(conf);
Properties properties = new Properties();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
- FileSinkOperator.RecordWriter writer =
+ FSRecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
properties, Reporter.NULL);
writer.close(true);
@@ -731,7 +731,7 @@ public class TestInputOutputFormat {
}
SerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
- FileSinkOperator.RecordWriter writer =
+ FSRecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
true, properties, Reporter.NULL);
writer.write(serde.serialize(new StringRow("owen"), inspector));
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/udf/Rot13OutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/udf/Rot13OutputFormat.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/udf/Rot13OutputFormat.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/udf/Rot13OutputFormat.java Tue Oct 1 04:48:44 2013
@@ -1,7 +1,10 @@
package org.apache.hadoop.hive.ql.io.udf;
+import java.io.IOException;
+import java.util.Properties;
+
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.io.FSRecordWriter;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
@@ -11,27 +14,24 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
-import java.io.IOException;
-import java.util.Properties;
-
public class Rot13OutputFormat
extends HiveIgnoreKeyTextOutputFormat<LongWritable,Text> {
@Override
- public RecordWriter
+ public FSRecordWriter
getHiveRecordWriter(JobConf jc,
Path outPath,
Class<? extends Writable> valueClass,
boolean isCompressed,
Properties tableProperties,
Progressable progress) throws IOException {
- final RecordWriter result =
+ final FSRecordWriter result =
super.getHiveRecordWriter(jc,outPath,valueClass,isCompressed,
tableProperties,progress);
final Reporter reporter = (Reporter) progress;
reporter.setStatus("got here");
System.out.println("Got a reporter " + reporter);
- return new RecordWriter() {
+ return new FSRecordWriter() {
@Override
public void write(Writable w) throws IOException {
if (w instanceof Text) {
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java Tue Oct 1 04:48:44 2013
@@ -25,6 +25,6 @@ public class TestToInteger extends TestC
Text t4 = new Text("1.1");
IntWritable i4 = ti.evaluate(t4);
- assertNull(i4);
+ assertEquals(1, i4.get());
}
}
Modified: hive/branches/tez/ql/src/test/queries/clientnegative/deletejar.q
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/queries/clientnegative/deletejar.q?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/queries/clientnegative/deletejar.q (original)
+++ hive/branches/tez/ql/src/test/queries/clientnegative/deletejar.q Tue Oct 1 04:48:44 2013
@@ -1,4 +1,4 @@
-ADD JAR ../data/files/TestSerDe.jar;
-DELETE JAR ../data/files/TestSerDe.jar;
+ADD JAR ../build/ql/test/TestSerDe.jar;
+DELETE JAR ../build/ql/test/TestSerDe.jar;
CREATE TABLE DELETEJAR(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' STORED AS TEXTFILE;
Modified: hive/branches/tez/ql/src/test/queries/clientnegative/invalid_columns.q
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/queries/clientnegative/invalid_columns.q?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/queries/clientnegative/invalid_columns.q (original)
+++ hive/branches/tez/ql/src/test/queries/clientnegative/invalid_columns.q Tue Oct 1 04:48:44 2013
@@ -1,4 +1,4 @@
-ADD JAR ../data/files/TestSerDe.jar;
+ADD JAR ../build/ql/test/TestSerDe.jar;
CREATE TABLE DELETEJAR(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
STORED AS TEXTFILE
TBLPROPERTIES('columns'='valid_colname,invalid.colname');
Modified: hive/branches/tez/ql/src/test/queries/clientpositive/alter1.q
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/queries/clientpositive/alter1.q?rev=1527883&r1=1527882&r2=1527883&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/queries/clientpositive/alter1.q (original)
+++ hive/branches/tez/ql/src/test/queries/clientpositive/alter1.q Tue Oct 1 04:48:44 2013
@@ -15,7 +15,7 @@ describe extended alter1;
alter table alter1 set serdeproperties('s1'='10', 's2' ='20');
describe extended alter1;
-add jar ../data/files/TestSerDe.jar;
+add jar ../build/ql/test/TestSerDe.jar;
alter table alter1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9');
describe extended alter1;
@@ -56,7 +56,7 @@ DESCRIBE EXTENDED alter1;
ALTER TABLE alter1 SET SERDEPROPERTIES('s1'='10', 's2' ='20');
DESCRIBE EXTENDED alter1;
-add jar ../data/files/TestSerDe.jar;
+add jar ../build/ql/test/TestSerDe.jar;
ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' WITH SERDEPROPERTIES ('s1'='9');
DESCRIBE EXTENDED alter1;