You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/12 19:23:14 UTC
svn commit: r1541190 [5/15] - in /hive/branches/tez: ./
ant/src/org/apache/hadoop/hive/ant/ beeline/
beeline/src/java/org/apache/hive/beeline/ cli/
cli/src/java/org/apache/hadoop/hive/cli/ common/
common/src/java/org/apache/hadoop/hive/common/ common/s...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Nov 12 18:23:05 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -54,6 +55,8 @@ final class ReaderImpl implements Reader
private final CompressionKind compressionKind;
private final CompressionCodec codec;
private final int bufferSize;
+ private OrcProto.Metadata metadata = null;
+ private final int metadataSize;
private final OrcProto.Footer footer;
private final ObjectInspector inspector;
private long deserializedSize = -1;
@@ -294,12 +297,14 @@ final class ReaderImpl implements Reader
FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fs, path);
MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionType,
- footerMetaData.bufferSize, footerMetaData.footerBuffer);
+ footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer);
this.footerByteBuffer = footerMetaData.footerBuffer;
this.compressionKind = rInfo.compressionKind;
this.codec = rInfo.codec;
this.bufferSize = rInfo.bufferSize;
+ this.metadataSize = rInfo.metadataSize;
+ this.metadata = rInfo.metadata;
this.footer = rInfo.footer;
this.inspector = rInfo.inspector;
}
@@ -321,12 +326,15 @@ final class ReaderImpl implements Reader
MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(
fMetaInfo.compressionType,
fMetaInfo.bufferSize,
+ fMetaInfo.metadataSize,
fMetaInfo.footerBuffer
);
this.footerByteBuffer = fMetaInfo.footerBuffer;
this.compressionKind = rInfo.compressionKind;
this.codec = rInfo.codec;
this.bufferSize = rInfo.bufferSize;
+ this.metadataSize = rInfo.metadataSize;
+ this.metadata = rInfo.metadata;
this.footer = rInfo.footer;
this.inspector = rInfo.inspector;
}
@@ -354,6 +362,9 @@ final class ReaderImpl implements Reader
checkOrcVersion(LOG, path, ps.getVersionList());
+ int footerSize = (int) ps.getFooterLength();
+ int metadataSize = (int) ps.getMetadataLength();
+
//check compression codec
switch (ps.getCompression()) {
case NONE:
@@ -368,11 +379,8 @@ final class ReaderImpl implements Reader
throw new IllegalArgumentException("Unknown compression");
}
- //get footer size
- int footerSize = (int) ps.getFooterLength();
-
//check if extra bytes need to be read
- int extra = Math.max(0, psLen + 1 + footerSize - readSize);
+ int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
if (extra > 0) {
//more bytes need to be read, seek back to the right place and read extra bytes
file.seek(size - readSize - extra);
@@ -384,22 +392,24 @@ final class ReaderImpl implements Reader
extraBuf.put(buffer);
buffer = extraBuf;
buffer.position(0);
- buffer.limit(footerSize);
+ buffer.limit(footerSize + metadataSize);
} else {
//footer is already in the bytes in buffer, just adjust position, length
- buffer.position(psOffset - footerSize);
+ buffer.position(psOffset - footerSize - metadataSize);
buffer.limit(psOffset);
}
- file.close();
- //mark this position so that we can call reset() to get back to it
+
+ // remember position for later
buffer.mark();
+ file.close();
+
return new FileMetaInfo(
ps.getCompression().toString(),
(int) ps.getCompressionBlockSize(),
+ (int) ps.getMetadataLength(),
buffer
);
-
}
@@ -415,23 +425,40 @@ final class ReaderImpl implements Reader
final CompressionKind compressionKind;
final CompressionCodec codec;
final int bufferSize;
+ final int metadataSize;
+ final OrcProto.Metadata metadata;
final OrcProto.Footer footer;
final ObjectInspector inspector;
- MetaInfoObjExtractor(String codecStr, int bufferSize, ByteBuffer footerBuffer) throws IOException {
+ MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize,
+ ByteBuffer footerBuffer) throws IOException {
+
this.compressionKind = CompressionKind.valueOf(codecStr);
this.bufferSize = bufferSize;
this.codec = WriterImpl.createCodec(compressionKind);
- int footerBufferSize = footerBuffer.limit() - footerBuffer.position();
- InputStream instream = InStream.create("footer", new ByteBuffer[]{footerBuffer},
+ this.metadataSize = metadataSize;
+
+ int position = footerBuffer.position();
+ int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
+ footerBuffer.limit(position + metadataSize);
+
+ InputStream instream = InStream.create("metadata", new ByteBuffer[]{footerBuffer},
+ new long[]{0L}, metadataSize, codec, bufferSize);
+ this.metadata = OrcProto.Metadata.parseFrom(instream);
+
+ footerBuffer.position(position + metadataSize);
+ footerBuffer.limit(position + metadataSize + footerBufferSize);
+ instream = InStream.create("footer", new ByteBuffer[]{footerBuffer},
new long[]{0L}, footerBufferSize, codec, bufferSize);
this.footer = OrcProto.Footer.parseFrom(instream);
+
+ footerBuffer.position(position);
this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
}
}
public FileMetaInfo getFileMetaInfo(){
- return new FileMetaInfo(compressionKind.toString(), bufferSize, footerByteBuffer);
+ return new FileMetaInfo(compressionKind.toString(), bufferSize, metadataSize, footerByteBuffer);
}
@@ -451,6 +478,13 @@ final class ReaderImpl implements Reader
public RecordReader rows(long offset, long length, boolean[] include,
SearchArgument sarg, String[] columnNames
) throws IOException {
+
+ // if included columns is null, then include all columns
+ if (include == null) {
+ include = new boolean[footer.getTypesCount()];
+ Arrays.fill(include, true);
+ }
+
return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset,
length, footer.getTypesList(), codec, bufferSize,
include, footer.getRowIndexStride(), sarg, columnNames);
@@ -576,4 +610,9 @@ final class ReaderImpl implements Reader
return Collections.max(indices);
}
+ @Override
+ public Metadata getMetadata() throws IOException {
+ return new Metadata(metadata);
+ }
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Nov 12 18:23:05 2013
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -64,6 +65,7 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedOutputStream;
@@ -125,7 +127,6 @@ class WriterImpl implements Writer, Memo
private final boolean buildIndex;
private final MemoryManager memoryManager;
private final OrcFile.Version version;
-
private final Configuration conf;
WriterImpl(FileSystem fs,
@@ -411,6 +412,7 @@ class WriterImpl implements Writer, Memo
private final BitFieldWriter isPresent;
private final boolean isCompressed;
protected final ColumnStatisticsImpl indexStatistics;
+ protected final ColumnStatisticsImpl stripeColStatistics;
private final ColumnStatisticsImpl fileStatistics;
protected TreeWriter[] childrenWriters;
protected final RowIndexPositionRecorder rowIndexPosition;
@@ -419,7 +421,7 @@ class WriterImpl implements Writer, Memo
private final PositionedOutputStream rowIndexStream;
private boolean foundNulls;
private OutStream isPresentOutStream;
- protected final boolean useDirectV2Encoding;
+ private final List<StripeStatistics.Builder> stripeStatsBuilders;
/**
* Create a tree writer.
@@ -435,7 +437,6 @@ class WriterImpl implements Writer, Memo
this.isCompressed = streamFactory.isCompressed();
this.id = columnId;
this.inspector = inspector;
- this.useDirectV2Encoding = true;
if (nullable) {
isPresentOutStream = streamFactory.createStream(id,
OrcProto.Stream.Kind.PRESENT);
@@ -445,11 +446,13 @@ class WriterImpl implements Writer, Memo
}
this.foundNulls = false;
indexStatistics = ColumnStatisticsImpl.create(inspector);
+ stripeColStatistics = ColumnStatisticsImpl.create(inspector);
fileStatistics = ColumnStatisticsImpl.create(inspector);
childrenWriters = new TreeWriter[0];
rowIndex = OrcProto.RowIndex.newBuilder();
rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
+ stripeStatsBuilders = Lists.newArrayList();
if (streamFactory.buildIndex()) {
rowIndexStream = streamFactory.createStream(id,
OrcProto.Stream.Kind.ROW_INDEX);
@@ -462,6 +465,10 @@ class WriterImpl implements Writer, Memo
return rowIndex;
}
+ protected ColumnStatisticsImpl getStripeStatistics() {
+ return stripeColStatistics;
+ }
+
protected ColumnStatisticsImpl getFileStatistics() {
return fileStatistics;
}
@@ -537,6 +544,12 @@ class WriterImpl implements Writer, Memo
}
}
+ // merge stripe-level column statistics to file statistics and write it to
+ // stripe statistics
+ OrcProto.StripeStatistics.Builder stripeStatsBuilder = OrcProto.StripeStatistics.newBuilder();
+ writeStripeStatistics(stripeStatsBuilder, this);
+ stripeStatsBuilders.add(stripeStatsBuilder);
+
// reset the flag for next stripe
foundNulls = false;
@@ -554,6 +567,16 @@ class WriterImpl implements Writer, Memo
rowIndexEntry.clear();
}
+ private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder,
+ TreeWriter treeWriter) {
+ treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics);
+ builder.addColStats(treeWriter.stripeColStatistics.serialize().build());
+ treeWriter.stripeColStatistics.reset();
+ for (TreeWriter child : treeWriter.getChildrenWriters()) {
+ writeStripeStatistics(builder, child);
+ }
+ }
+
TreeWriter[] getChildrenWriters() {
return childrenWriters;
}
@@ -575,7 +598,7 @@ class WriterImpl implements Writer, Memo
* @throws IOException
*/
void createRowIndexEntry() throws IOException {
- fileStatistics.merge(indexStatistics);
+ stripeColStatistics.merge(indexStatistics);
rowIndexEntry.setStatistics(indexStatistics.serialize());
indexStatistics.reset();
rowIndex.addEntry(rowIndexEntry);
@@ -1013,7 +1036,7 @@ class WriterImpl implements Writer, Memo
*/
@Override
void createRowIndexEntry() throws IOException {
- getFileStatistics().merge(indexStatistics);
+ getStripeStatistics().merge(indexStatistics);
OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
rowIndexEntry.setStatistics(indexStatistics.serialize());
indexStatistics.reset();
@@ -1851,6 +1874,21 @@ class WriterImpl implements Writer, Memo
}
}
+ private int writeMetadata(long bodyLength) throws IOException {
+ ensureWriter();
+ OrcProto.Metadata.Builder builder = OrcProto.Metadata.newBuilder();
+ for(OrcProto.StripeStatistics.Builder ssb : treeWriter.stripeStatsBuilders) {
+ builder.addStripeStats(ssb.build());
+ }
+
+ long startPosn = rawWriter.getPos();
+ OrcProto.Metadata metadata = builder.build();
+ metadata.writeTo(protobufWriter);
+ protobufWriter.flush();
+ writer.flush();
+ return (int) (rawWriter.getPos() - startPosn);
+ }
+
private int writeFooter(long bodyLength) throws IOException {
ensureWriter();
OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
@@ -1881,11 +1919,12 @@ class WriterImpl implements Writer, Memo
return (int) (rawWriter.getPos() - startPosn);
}
- private int writePostScript(int footerLength) throws IOException {
+ private int writePostScript(int footerLength, int metadataLength) throws IOException {
OrcProto.PostScript.Builder builder =
OrcProto.PostScript.newBuilder()
.setCompression(writeCompressionKind(compress))
.setFooterLength(footerLength)
+ .setMetadataLength(metadataLength)
.setMagic(OrcFile.MAGIC)
.addVersion(version.getMajor())
.addVersion(version.getMinor());
@@ -1940,8 +1979,9 @@ class WriterImpl implements Writer, Memo
// actually close the file
synchronized (this) {
flushStripe();
- int footerLength = writeFooter(rawWriter.getPos());
- rawWriter.writeByte(writePostScript(footerLength));
+ int metadataLength = writeMetadata(rawWriter.getPos());
+ int footerLength = writeFooter(rawWriter.getPos() - metadataLength);
+ rawWriter.writeByte(writePostScript(footerLength, metadataLength));
rawWriter.close();
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java Tue Nov 12 18:23:05 2013
@@ -91,8 +91,7 @@ public class RCFileMergeMapper extends M
updatePaths(tmpPath, taskTmpPath);
try {
fs = (new Path(specPath)).getFileSystem(job);
- autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs,
- outPath);
+ autoDelete = fs.deleteOnExit(outPath);
} catch (IOException e) {
this.exception = true;
throw new RuntimeException(e);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java Tue Nov 12 18:23:05 2013
@@ -84,8 +84,7 @@ public class ColumnTruncateMapper extend
updatePaths(tmpPath, taskTmpPath);
try {
fs = (new Path(specPath)).getFileSystem(job);
- autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs,
- outPath);
+ autoDelete = fs.deleteOnExit(outPath);
} catch (IOException e) {
this.exception = true;
throw new RuntimeException(e);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Nov 12 18:23:05 2013
@@ -1567,8 +1567,8 @@ private void constructOneLBLocationMap(F
if (tpartParams == null) {
return;
}
- List<String> statTypes = StatsSetupConst.getSupportedStats();
- for (String statType : statTypes) {
+
+ for (String statType : StatsSetupConst.supportedStats) {
tpartParams.remove(statType);
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java Tue Nov 12 18:23:05 2013
@@ -260,7 +260,7 @@ public class JsonMetaDataFormatter imple
// in case all files in locations do not exist
try {
FileStatus tmpStatus = fs.getFileStatus(tblPath);
- lastAccessTime = ShimLoader.getHadoopShims().getAccessTime(tmpStatus);
+ lastAccessTime = tmpStatus.getAccessTime();
lastUpdateTime = tmpStatus.getModificationTime();
} catch (IOException e) {
LOG.warn(
@@ -273,7 +273,7 @@ public class JsonMetaDataFormatter imple
try {
FileStatus status = fs.getFileStatus(tblPath);
FileStatus[] files = fs.listStatus(loc);
- long accessTime = ShimLoader.getHadoopShims().getAccessTime(status);
+ long accessTime = status.getAccessTime();
long updateTime = status.getModificationTime();
// no matter loc is the table location or part location, it must be a
// directory.
@@ -299,8 +299,7 @@ public class JsonMetaDataFormatter imple
if (fileLen < minFileSize) {
minFileSize = fileLen;
}
- accessTime = ShimLoader.getHadoopShims().getAccessTime(
- currentStatus);
+ accessTime = currentStatus.getAccessTime();
updateTime = currentStatus.getModificationTime();
if (accessTime > lastAccessTime) {
lastAccessTime = accessTime;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java Tue Nov 12 18:23:05 2013
@@ -278,7 +278,7 @@ class TextMetaDataFormatter implements M
// in case all files in locations do not exist
try {
FileStatus tmpStatus = fs.getFileStatus(tblPath);
- lastAccessTime = ShimLoader.getHadoopShims().getAccessTime(tmpStatus);
+ lastAccessTime = tmpStatus.getAccessTime();
lastUpdateTime = tmpStatus.getModificationTime();
if (partSpecified) {
// check whether the part exists or not in fs
@@ -295,7 +295,7 @@ class TextMetaDataFormatter implements M
try {
FileStatus status = fs.getFileStatus(tblPath);
FileStatus[] files = fs.listStatus(loc);
- long accessTime = ShimLoader.getHadoopShims().getAccessTime(status);
+ long accessTime = status.getAccessTime();
long updateTime = status.getModificationTime();
// no matter loc is the table location or part location, it must be a
// directory.
@@ -321,8 +321,7 @@ class TextMetaDataFormatter implements M
if (fileLen < minFileSize) {
minFileSize = fileLen;
}
- accessTime = ShimLoader.getHadoopShims().getAccessTime(
- currentStatus);
+ accessTime = currentStatus.getAccessTime();
updateTime = currentStatus.getModificationTime();
if (accessTime > lastAccessTime) {
lastAccessTime = accessTime;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java Tue Nov 12 18:23:05 2013
@@ -122,11 +122,6 @@ public class LimitPushdownOptimizer impl
}
}
if (rs != null) {
- List<List<Integer>> distincts = rs.getConf().getDistinctColumnIndices();
- if (distincts != null && distincts.size() > 1) {
- // multi distinct case. can not sure that it's safe just by multiplying limit value
- return false;
- }
LimitOperator limit = (LimitOperator) nd;
rs.getConf().setTopN(limit.getConf().getLimit());
rs.getConf().setTopNMemoryUsage(((LimitPushdownContext) procCtx).threshold);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java Tue Nov 12 18:23:05 2013
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.optimizer;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@@ -25,6 +26,8 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -37,7 +40,11 @@ import org.apache.hadoop.hive.ql.exec.Ta
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.metadata.InputEstimator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
@@ -53,6 +60,8 @@ import org.apache.hadoop.hive.ql.plan.Op
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
/**
* Tries to convert simple fetch query to single fetch task, which fetches rows directly
@@ -75,7 +84,7 @@ public class SimpleFetchOptimizer implem
if (fetchTask != null) {
pctx.setFetchTask(fetchTask);
}
- } catch (HiveException e) {
+ } catch (Exception e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
@@ -92,13 +101,13 @@ public class SimpleFetchOptimizer implem
// returns non-null FetchTask instance when succeeded
@SuppressWarnings("unchecked")
private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator source)
- throws HiveException {
+ throws Exception {
String mode = HiveConf.getVar(
pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION);
boolean aggressive = "more".equals(mode);
FetchData fetch = checkTree(aggressive, pctx, alias, source);
- if (fetch != null) {
+ if (fetch != null && checkThreshold(fetch, pctx)) {
int limit = pctx.getQB().getParseInfo().getOuterQueryLimit();
FetchWork fetchWork = fetch.convertToWork();
FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf());
@@ -110,6 +119,21 @@ public class SimpleFetchOptimizer implem
return null;
}
+ private boolean checkThreshold(FetchData data, ParseContext pctx) throws Exception {
+ long threshold = HiveConf.getLongVar(pctx.getConf(),
+ HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
+ if (threshold < 0) {
+ return true;
+ }
+ long remaining = threshold;
+ remaining -= data.getInputLength(pctx, remaining);
+ if (remaining < 0) {
+ LOG.info("Threshold " + remaining + " exceeded for pseudoMR mode");
+ return false;
+ }
+ return true;
+ }
+
// all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
//
// for non-aggressive mode (minimal)
@@ -144,7 +168,8 @@ public class SimpleFetchOptimizer implem
PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts);
if (aggressive || !pruned.hasUnknownPartitions()) {
bypassFilter &= !pruned.hasUnknownPartitions();
- return checkOperators(new FetchData(pruned, splitSample), ts, aggressive, bypassFilter);
+ return checkOperators(new FetchData(table, pruned, splitSample), ts,
+ aggressive, bypassFilter);
}
}
return null;
@@ -171,6 +196,7 @@ public class SimpleFetchOptimizer implem
}
}
if (op instanceof FileSinkOperator) {
+ fetch.scanOp = ts;
fetch.fileSink = op;
return fetch;
}
@@ -184,6 +210,9 @@ public class SimpleFetchOptimizer implem
private final PrunedPartitionList partsList;
private final HashSet<ReadEntity> inputs = new HashSet<ReadEntity>();
+ // source table scan
+ private TableScanOperator scanOp;
+
// this is always non-null when conversion is completed
private Operator<?> fileSink;
@@ -193,15 +222,15 @@ public class SimpleFetchOptimizer implem
this.splitSample = splitSample;
}
- private FetchData(PrunedPartitionList partsList, SplitSample splitSample) {
- this.table = null;
+ private FetchData(Table table, PrunedPartitionList partsList, SplitSample splitSample) {
+ this.table = table;
this.partsList = partsList;
this.splitSample = splitSample;
}
private FetchWork convertToWork() throws HiveException {
inputs.clear();
- if (table != null) {
+ if (!table.isPartitioned()) {
inputs.add(new ReadEntity(table));
String path = table.getPath().toString();
FetchWork work = new FetchWork(path, Utilities.getTableDesc(table));
@@ -236,6 +265,56 @@ public class SimpleFetchOptimizer implem
pctx.getSemanticInputs().addAll(inputs);
return replaceFSwithLS(fileSink, work.getSerializationNullFormat());
}
+
+ private long getInputLength(ParseContext pctx, long remaining) throws Exception {
+ if (splitSample != null && splitSample.getTotalLength() != null) {
+ return splitSample.getTotalLength();
+ }
+ long length = calculateLength(pctx, remaining);
+ if (splitSample != null) {
+ return splitSample.getTargetSize(length);
+ }
+ return length;
+ }
+
+ private long calculateLength(ParseContext pctx, long remaining) throws Exception {
+ JobConf jobConf = new JobConf(pctx.getConf());
+ Utilities.setColumnNameList(jobConf, scanOp, true);
+ Utilities.setColumnTypeList(jobConf, scanOp, true);
+ HiveStorageHandler handler = table.getStorageHandler();
+ if (handler instanceof InputEstimator) {
+ InputEstimator estimator = (InputEstimator) handler;
+ TableDesc tableDesc = Utilities.getTableDesc(table);
+ PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
+ Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
+ return estimator.estimate(jobConf, scanOp, remaining).getTotalLength();
+ }
+ if (table.isNonNative()) {
+ return 0; // nothing can be done
+ }
+ if (!table.isPartitioned()) {
+ return getFileLength(jobConf, table.getPath(), table.getInputFormatClass());
+ }
+ long total = 0;
+ for (Partition partition : partsList.getNotDeniedPartns()) {
+ Path path = partition.getPartitionPath();
+ total += getFileLength(jobConf, path, partition.getInputFormatClass());
+ }
+ return total;
+ }
+
+ // from Utilities.getInputSummary()
+ private long getFileLength(JobConf conf, Path path, Class<? extends InputFormat> clazz)
+ throws IOException {
+ ContentSummary summary;
+ if (ContentSummaryInputFormat.class.isAssignableFrom(clazz)) {
+ InputFormat input = HiveInputFormat.getInputFormatFromCache(clazz, conf);
+ summary = ((ContentSummaryInputFormat)input).getContentSummary(path, conf);
+ } else {
+ summary = path.getFileSystem(conf).getContentSummary(path);
+ }
+ return summary.getLength();
+ }
}
public static ListSinkOperator replaceFSwithLS(Operator<?> fileSink, String nullFormat) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Tue Nov 12 18:23:05 2013
@@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.Stack;
import org.apache.commons.logging.Log;
@@ -216,13 +217,15 @@ public class StatsOptimizer implements T
String colName = desc.getColumn();
StatType type = getType(desc.getTypeString());
if(!tbl.isPartitioned()) {
- rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
+ if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
+ Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
+ return null;
+ }
+ rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
if (rowCnt < 1) {
Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
return null;
}
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
tbl.getDbName(),tbl.getTableName(),colName).
getStatsObjIterator().next().getStatsData();
@@ -236,18 +239,20 @@ public class StatsOptimizer implements T
}
} else {
for (Partition part : hive.getAllPartitionsOf(tbl)) {
- Long partRowCnt = Long.parseLong(part.getParameters()
+ if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+ Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+ return null;
+ }
+ Long partRowCnt = Long.parseLong(part.getParameters()
.get(StatsSetupConst.ROW_COUNT));
if (partRowCnt < 1) {
Log.debug("Partition doesn't have upto date stats " + part.getSpec());
return null;
}
rowCnt += partRowCnt;
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
- tbl.getDbName(), tbl.getTableName(),part.getName(), colName)
- .getStatsObjIterator().next().getStatsData();
+ tbl.getDbName(), tbl.getTableName(),part.getName(), colName)
+ .getStatsObjIterator().next().getStatsData();
Long nullCnt = getNullcountFor(type, statData);
if(nullCnt == null) {
Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
@@ -268,11 +273,13 @@ public class StatsOptimizer implements T
String colName = colDesc.getColumn();
StatType type = getType(colDesc.getTypeString());
if(!tbl.isPartitioned()) {
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
+ if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
+ Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
+ return null;
+ }
ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
- tbl.getDbName(),tbl.getTableName(),colName).
- getStatsObjIterator().next().getStatsData();
+ tbl.getDbName(),tbl.getTableName(),colName).
+ getStatsObjIterator().next().getStatsData();
switch (type) {
case Integeral:
oneRow.add(statData.getLongStats().getHighValue());
@@ -291,16 +298,17 @@ public class StatsOptimizer implements T
return null;
}
} else {
- List<String> parts = hive.getMSC().listPartitionNames(tbl.getDbName(),
- tbl.getTableName(), (short)-1);
+ Set<Partition> parts = hive.getAllPartitionsOf(tbl);
switch(type) {
case Integeral: {
long maxVal = Long.MIN_VALUE;
- for (String part : parts) {
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
- ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
- tbl.getDbName(),tbl.getTableName(), part, colName).
+ for (Partition part : parts) {
+ if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+ Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+ return null;
+ }
+ ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
+ tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
getStatsObjIterator().next().getStatsData();
maxVal = Math.max(maxVal,statData.getLongStats().getHighValue());
}
@@ -311,12 +319,14 @@ public class StatsOptimizer implements T
}
case Double: {
double maxVal = Double.MIN_VALUE;
- for (String part : parts) {
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
+ for (Partition part : parts) {
+ if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+ Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+ return null;
+ }
ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
- tbl.getDbName(),tbl.getTableName(), part, colName).
- getStatsObjIterator().next().getStatsData();
+ tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
+ getStatsObjIterator().next().getStatsData();
maxVal = Math.max(maxVal,statData.getDoubleStats().getHighValue());
}
oneRow.add(maxVal);
@@ -336,11 +346,13 @@ public class StatsOptimizer implements T
String colName = colDesc.getColumn();
StatType type = getType(colDesc.getTypeString());
if (!tbl.isPartitioned()) {
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
+ if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
+ Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
+ return null;
+ }
ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
- tbl.getDbName(),tbl.getTableName(),colName).
- getStatsObjIterator().next().getStatsData();
+ tbl.getDbName(),tbl.getTableName(),colName).
+ getStatsObjIterator().next().getStatsData();
switch (type) {
case Integeral:
oneRow.add(statData.getLongStats().getLowValue());
@@ -358,17 +370,18 @@ public class StatsOptimizer implements T
return null;
}
} else {
- List<String> parts = hive.getMSC().listPartitionNames(tbl.getDbName(),
- tbl.getTableName(), (short)-1);
+ Set<Partition> parts = hive.getAllPartitionsOf(tbl);
switch(type) {
case Integeral: {
long minVal = Long.MAX_VALUE;
- for (String part : parts) {
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
+ for (Partition part : parts) {
+ if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+ Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+ return null;
+ }
ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
- tbl.getDbName(),tbl.getTableName(), part, colName).
- getStatsObjIterator().next().getStatsData();
+ tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
+ getStatsObjIterator().next().getStatsData();
minVal = Math.min(minVal,statData.getLongStats().getLowValue());
}
oneRow.add(minVal);
@@ -378,12 +391,14 @@ public class StatsOptimizer implements T
}
case Double: {
double minVal = Double.MAX_VALUE;
- for (String part : parts) {
- //TODO: After HIVE-3777 use the property to figure out if following
- // stats is fresh or not.
+ for (Partition part : parts) {
+ if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+ Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+ return null;
+ }
ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
- tbl.getDbName(),tbl.getTableName(), part, colName).
- getStatsObjIterator().next().getStatsData();
+ tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
+ getStatsObjIterator().next().getStatsData();
minVal = Math.min(minVal,statData.getDoubleStats().getLowValue());
}
oneRow.add(minVal);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java Tue Nov 12 18:23:05 2013
@@ -397,24 +397,31 @@ public class CorrelationOptimizer implem
}
}
if (current instanceof JoinOperator) {
- LinkedHashSet<ReduceSinkOperator> correlatedRsOps =
- new LinkedHashSet<ReduceSinkOperator>();
+ boolean isCorrelated = true;
+ int expectedNumCorrelatedRsops = current.getParentOperators().size();
+ LinkedHashSet<ReduceSinkOperator> correlatedRsops = null;
for (Operator<? extends OperatorDesc> parent : current.getParentOperators()) {
Set<String> tableNames =
pCtx.getOpParseCtx().get(parent).getRowResolver().getTableNames();
for (String tbl : tableNames) {
if (tableNeedToCheck.contains(tbl)) {
- correlatedRsOps.addAll(findCorrelatedReduceSinkOperators(
- current, backtrackedKeyCols, backtrackedPartitionCols, childRSOrder,
- parent, correlation));
+ correlatedRsops = findCorrelatedReduceSinkOperators(current,
+ backtrackedKeyCols, backtrackedPartitionCols,
+ childRSOrder, parent, correlation);
+ if (correlatedRsops.size() != expectedNumCorrelatedRsops) {
+ isCorrelated = false;
+ }
}
}
+ if (!isCorrelated) {
+ break;
+ }
}
// If current is JoinOperaotr, we will stop to traverse the tree
// when any of parent ReduceSinkOperaotr of this JoinOperator is
// not considered as a correlated ReduceSinkOperator.
- if (correlatedRsOps.size() == current.getParentOperators().size()) {
- correlatedReduceSinkOperators.addAll(correlatedRsOps);
+ if (isCorrelated && correlatedRsops != null) {
+ correlatedReduceSinkOperators.addAll(correlatedRsops);
} else {
correlatedReduceSinkOperators.clear();
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Tue Nov 12 18:23:05 2013
@@ -84,6 +84,9 @@ public class PhysicalOptimizer {
if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
resolvers.add(new Vectorizer());
}
+ if (!"none".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
+ resolvers.add(new StageIDsRearranger());
+ }
}
/**
@@ -98,5 +101,4 @@ public class PhysicalOptimizer {
}
return pctx;
}
-
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Nov 12 18:23:05 2013
@@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -174,6 +175,7 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(GenericUDFConcat.class);
supportedGenericUDFs.add(GenericUDFAbs.class);
supportedGenericUDFs.add(GenericUDFBetween.class);
+ supportedGenericUDFs.add(GenericUDFIn.class);
// For type casts
supportedGenericUDFs.add(UDFToLong.class);
@@ -364,13 +366,15 @@ public class Vectorizer implements Physi
Operator<? extends OperatorDesc> op = mWork.getAliasToWork().get(alias);
if (op == tsOp) {
fileKey = onefile;
+ if (vContext == null) {
+ vContext = getVectorizationContext(tsOp, physicalContext);
+ }
+ vContext.setFileKey(fileKey);
+ vectorizationContexts.put(fileKey, vContext);
break;
}
}
}
- vContext = getVectorizationContext(tsOp, physicalContext);
- vContext.setFileKey(fileKey);
- vectorizationContexts.put(fileKey, vContext);
vContextsByTSOp.put(tsOp, vContext);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java Tue Nov 12 18:23:05 2013
@@ -32,7 +32,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
public class ASTNode extends CommonTree implements Node,Serializable {
private static final long serialVersionUID = 1L;
- private ASTNodeOrigin origin;
+ private transient ASTNodeOrigin origin;
public ASTNode() {
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -20,6 +20,9 @@ package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
+import java.sql.Date;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -66,6 +69,7 @@ import org.apache.hadoop.hive.ql.plan.Li
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -73,6 +77,9 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.annotations.VisibleForTesting;
/**
* BaseSemanticAnalyzer.
@@ -80,6 +87,7 @@ import org.apache.hadoop.mapred.TextInpu
*/
@SuppressWarnings("deprecation")
public abstract class BaseSemanticAnalyzer {
+ private static final Log STATIC_LOG = LogFactory.getLog(BaseSemanticAnalyzer.class.getName());
protected final Hive db;
protected final HiveConf conf;
protected List<Task<? extends Serializable>> rootTasks;
@@ -707,10 +715,8 @@ public abstract class BaseSemanticAnalyz
this(db, conf, ast, true, false);
}
- public tableSpec(Hive db, HiveConf conf, ASTNode ast,
- boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec)
- throws SemanticException {
-
+ public tableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec,
+ boolean allowPartialPartitionsSpec) throws SemanticException {
assert (ast.getToken().getType() == HiveParser.TOK_TAB
|| ast.getToken().getType() == HiveParser.TOK_TABLE_PARTITION
|| ast.getToken().getType() == HiveParser.TOK_TABTYPE
@@ -761,7 +767,7 @@ public abstract class BaseSemanticAnalyz
partSpec.put(colName, val);
}
- // check if the columns specified in the partition() clause are actually partition columns
+ // check if the columns, as well as value types in the partition() clause are valid
validatePartSpec(tableHandle, partSpec, ast, conf);
// check if the partition spec is valid
@@ -840,7 +846,6 @@ public abstract class BaseSemanticAnalyz
return tableHandle.toString();
}
}
-
}
/**
@@ -1156,52 +1161,86 @@ public abstract class BaseSemanticAnalyz
}
}
- public static void validatePartSpec(Table tbl,
- Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
-
+ public static void validatePartSpec(Table tbl, Map<String, String> partSpec,
+ ASTNode astNode, HiveConf conf) throws SemanticException {
Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
Utilities.validatePartSpec(tbl, partSpec);
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+ if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+ return;
+ }
+
+ try {
+ getPartExprNodeDesc(astNode, astExprNodeMap);
+ } catch (HiveException e) {
+ return;
+ }
+ List<FieldSchema> parts = tbl.getPartitionKeys();
+ Map<String, String> partCols = new HashMap<String, String>(parts.size());
+ for (FieldSchema col : parts) {
+ partCols.put(col.getName(), col.getType().toLowerCase());
+ }
+ for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
+ String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
+ if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
+ astKeyName = stripIdentifierQuotes(astKeyName);
+ }
+ String colType = partCols.get(astKeyName);
+ ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
+
+ TypeInfo expectedType =
+ TypeInfoUtils.getTypeInfoFromTypeString(colType);
+ ObjectInspector outputOI =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+ Object value = null;
+ String colSpec = partSpec.get(astKeyName);
try {
- getPartExprNodeDesc(astNode, astExprNodeMap);
+ value =
+ ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
+ evaluate(colSpec);
} catch (HiveException e) {
- return;
+ throw new SemanticException(e);
}
- List<FieldSchema> parts = tbl.getPartitionKeys();
- Map<String, String> partCols = new HashMap<String, String>(parts.size());
- for (FieldSchema col : parts) {
- partCols.put(col.getName(), col.getType().toLowerCase());
- }
- for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
-
- String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
- if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
- astKeyName = stripIdentifierQuotes(astKeyName);
- }
- String colType = partCols.get(astKeyName);
- ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
-
- TypeInfo expectedType =
- TypeInfoUtils.getTypeInfoFromTypeString(colType);
- ObjectInspector outputOI =
- TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
- Object value = null;
- try {
- value =
- ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
- evaluate(partSpec.get(astKeyName));
- } catch (HiveException e) {
- throw new SemanticException(e);
- }
- Object convertedValue =
- ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
- if (convertedValue == null) {
- throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
- inputOI.getTypeName(), outputOI.getTypeName()));
- }
+ Object convertedValue =
+ ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
+ if (convertedValue == null) {
+ throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
+ inputOI.getTypeName(), outputOI.getTypeName()));
}
+
+ normalizeColSpec(partSpec, astKeyName, colType, colSpec, convertedValue);
+ }
+ }
+
+ @VisibleForTesting
+ static void normalizeColSpec(Map<String, String> partSpec, String colName,
+ String colType, String originalColSpec, Object colValue) throws SemanticException {
+ if (colValue == null) return; // nothing to do with nulls
+ String normalizedColSpec = originalColSpec;
+ if (colType.equals(serdeConstants.DATE_TYPE_NAME)) {
+ normalizedColSpec = normalizeDateCol(colValue, originalColSpec);
+ }
+ if (!normalizedColSpec.equals(originalColSpec)) {
+ STATIC_LOG.warn("Normalizing partition spec - " + colName + " from "
+ + originalColSpec + " to " + normalizedColSpec);
+ partSpec.put(colName, normalizedColSpec);
+ }
+ }
+
+ /** A fixed date format to be used for hive partition column values. */
+ private static final DateFormat partitionDateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+ private static String normalizeDateCol(
+ Object colValue, String originalColSpec) throws SemanticException {
+ Date value;
+ if (colValue instanceof DateWritable) {
+ value = ((DateWritable) colValue).get();
+ } else if (colValue instanceof Date) {
+ value = (Date) colValue;
+ } else {
+ throw new SemanticException("Unexpected date type " + colValue.getClass());
}
+ return partitionDateFormat.format(value);
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -127,6 +127,10 @@ import org.apache.hadoop.hive.ql.securit
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.mapred.InputFormat;
@@ -150,6 +154,7 @@ public class DDLSemanticAnalyzer extends
TokenToTypeName.put(HiveParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME);
TokenToTypeName.put(HiveParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME);
TokenToTypeName.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME);
+ TokenToTypeName.put(HiveParser.TOK_CHAR, serdeConstants.CHAR_TYPE_NAME);
TokenToTypeName.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME);
TokenToTypeName.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME);
TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME);
@@ -168,6 +173,10 @@ public class DDLSemanticAnalyzer extends
}
switch (token) {
+ case HiveParser.TOK_CHAR:
+ CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(node);
+ typeName = charTypeInfo.getQualifiedName();
+ break;
case HiveParser.TOK_VARCHAR:
VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node);
typeName = varcharTypeInfo.getQualifiedName();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -19,7 +19,7 @@
package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
-import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import org.apache.hadoop.fs.Path;
@@ -60,21 +60,16 @@ public class ExplainSemanticAnalyzer ext
ctx.setExplainLogical(logical);
// Create a semantic analyzer for the query
- BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode) ast
- .getChild(0));
- sem.analyze((ASTNode) ast.getChild(0), ctx);
+ ASTNode input = (ASTNode) ast.getChild(0);
+ BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, input);
+ sem.analyze(input, ctx);
sem.validate();
ctx.setResFile(new Path(ctx.getLocalTmpFileURI()));
List<Task<? extends Serializable>> tasks = sem.getRootTasks();
Task<? extends Serializable> fetchTask = sem.getFetchTask();
if (tasks == null) {
- if (fetchTask != null) {
- tasks = new ArrayList<Task<? extends Serializable>>();
- tasks.add(fetchTask);
- }
- } else if (fetchTask != null) {
- tasks.add(fetchTask);
+ tasks = Collections.emptyList();
}
ParseContext pCtx = null;
@@ -82,17 +77,21 @@ public class ExplainSemanticAnalyzer ext
pCtx = ((SemanticAnalyzer)sem).getParseContext();
}
- Task<? extends Serializable> explTask =
- TaskFactory.get(new ExplainWork(ctx.getResFile().toString(),
+ ExplainWork work = new ExplainWork(ctx.getResFile().toString(),
pCtx,
tasks,
- ((ASTNode) ast.getChild(0)).toStringTree(),
+ fetchTask,
+ input.toStringTree(),
sem.getInputs(),
extended,
formatted,
dependency,
- logical),
- conf);
+ logical);
+
+ work.setAppendTaskType(
+ HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
+
+ Task<? extends Serializable> explTask = TaskFactory.get(work, conf);
fieldList = explTask.getResultSchema();
rootTasks.add(explTask);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Tue Nov 12 18:23:05 2013
@@ -105,6 +105,7 @@ KW_DATETIME: 'DATETIME';
KW_TIMESTAMP: 'TIMESTAMP';
KW_DECIMAL: 'DECIMAL';
KW_STRING: 'STRING';
+KW_CHAR: 'CHAR';
KW_VARCHAR: 'VARCHAR';
KW_ARRAY: 'ARRAY';
KW_STRUCT: 'STRUCT';
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Tue Nov 12 18:23:05 2013
@@ -110,6 +110,7 @@ TOK_DATELITERAL;
TOK_DATETIME;
TOK_TIMESTAMP;
TOK_STRING;
+TOK_CHAR;
TOK_VARCHAR;
TOK_BINARY;
TOK_DECIMAL;
@@ -1781,6 +1782,7 @@ primitiveType
| KW_BINARY -> TOK_BINARY
| KW_DECIMAL (LPAREN prec=Number (COMMA scale=Number)? RPAREN)? -> ^(TOK_DECIMAL $prec? $scale?)
| KW_VARCHAR LPAREN length=Number RPAREN -> ^(TOK_VARCHAR $length)
+ | KW_CHAR LPAREN length=Number RPAREN -> ^(TOK_CHAR $length)
;
listType
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java Tue Nov 12 18:23:05 2013
@@ -26,6 +26,9 @@ import org.apache.hadoop.hive.common.typ
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -131,6 +134,16 @@ public final class ParseUtils {
return TypeInfoFactory.getVarcharTypeInfo(Integer.valueOf(lengthStr));
}
+ public static CharTypeInfo getCharTypeInfo(ASTNode node)
+ throws SemanticException {
+ if (node.getChildCount() != 1) {
+ throw new SemanticException("Bad params for type char");
+ }
+
+ String lengthStr = node.getChild(0).getText();
+ return TypeInfoFactory.getCharTypeInfo(Integer.valueOf(lengthStr));
+ }
+
static int getIndex(String[] list, String elem) {
for(int i=0; i < list.length; i++) {
if (list[i].toLowerCase().equals(elem)) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -166,6 +167,8 @@ import org.apache.hadoop.hive.serde2.laz
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
@@ -806,6 +809,7 @@ public class SemanticAnalyzer extends Ba
String currentDatabase = SessionState.get().getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name);
+ // TODO: is this supposed to fall thru?
case HiveParser.TOK_DESTINATION:
ctx_1.dest = "insclause-" + ctx_1.nextNum;
@@ -967,17 +971,17 @@ public class SemanticAnalyzer extends Ba
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
+ skipRecursion = false;
+ break;
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
-
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 &&
tab.getChildCount() == 2 &&
destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
-
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap<String, String> partition = new HashMap<String, String>();
@@ -991,25 +995,30 @@ public class SemanticAnalyzer extends Ba
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
- if (childCount == partition.size()) {
- try {
- Table table = db.getTable(tableName);
- Partition parMetaData = db.getPartition(table, partition, false);
- // Check partition exists if it exists skip the overwrite
- if (parMetaData != null) {
- phase1Result = false;
- skipRecursion = true;
- LOG.info("Partition already exists so insert into overwrite " +
- "skipped for partition : " + parMetaData.toString());
- break;
- }
- } catch (HiveException e) {
- LOG.info("Error while getting metadata : ", e);
- }
- } else {
+ if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
.getMsg(partition.toString()));
}
+ Table table = null;
+ try {
+ table = db.getTable(tableName);
+ } catch (HiveException ex) {
+ throw new SemanticException(ex);
+ }
+ try {
+ Partition parMetaData = db.getPartition(table, partition, false);
+ // Check partition exists if it exists skip the overwrite
+ if (parMetaData != null) {
+ phase1Result = false;
+ skipRecursion = true;
+ LOG.info("Partition already exists so insert into overwrite " +
+ "skipped for partition : " + parMetaData.toString());
+ break;
+ }
+ } catch (HiveException e) {
+ LOG.info("Error while getting metadata : ", e);
+ }
+ validatePartSpec(table, partition, (ASTNode)tab, conf);
}
skipRecursion = false;
break;
@@ -5705,7 +5714,9 @@ public class SemanticAnalyzer extends Ba
colNames.add(inputCols.get(i).getInternalName());
colOIs[i] = inputCols.get(i).getObjectInspector();
}
- StructObjectInspector outputOI = genericUDTF.initialize(colOIs);
+ StandardStructObjectInspector rowOI =
+ ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs));
+ StructObjectInspector outputOI = genericUDTF.initialize(rowOI);
int numUdtfCols = outputOI.getAllStructFieldRefs().size();
if (colAliases.isEmpty()) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Tue Nov 12 18:23:05 2013
@@ -32,6 +32,7 @@ import java.util.Stack;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
@@ -62,6 +63,8 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
@@ -270,8 +273,16 @@ public final class TypeCheckProcFactory
0, expr.getText().length() - 1));
} else if (expr.getText().endsWith("BD")) {
// Literal decimal
- return new ExprNodeConstantDesc(TypeInfoFactory.decimalTypeInfo,
- expr.getText().substring(0, expr.getText().length() - 2));
+ String strVal = expr.getText().substring(0, expr.getText().length() - 2);
+ HiveDecimal hd = HiveDecimal.create(strVal);
+ int prec = 1;
+ int scale = 0;
+ if (hd != null) {
+ prec = hd.precision();
+ scale = hd.scale();
+ }
+ DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale);
+ return new ExprNodeConstantDesc(typeInfo, strVal);
} else {
v = Double.valueOf(expr.getText());
v = Long.valueOf(expr.getText());
@@ -573,6 +584,8 @@ public final class TypeCheckProcFactory
serdeConstants.DOUBLE_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_STRING,
serdeConstants.STRING_TYPE_NAME);
+ conversionFunctionTextHashMap.put(HiveParser.TOK_CHAR,
+ serdeConstants.CHAR_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR,
serdeConstants.VARCHAR_TYPE_NAME);
conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY,
@@ -796,6 +809,13 @@ public final class TypeCheckProcFactory
if (isFunction) {
ASTNode funcNameNode = (ASTNode)expr.getChild(0);
switch (funcNameNode.getType()) {
+ case HiveParser.TOK_CHAR:
+ // Add type params
+ CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(funcNameNode);
+ if (genericUDF != null) {
+ ((SettableUDF)genericUDF).setTypeInfo(charTypeInfo);
+ }
+ break;
case HiveParser.TOK_VARCHAR:
VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(funcNameNode);
if (genericUDF != null) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java Tue Nov 12 18:23:05 2013
@@ -36,6 +36,7 @@ public class ExplainWork implements Seri
private String resFile;
private ArrayList<Task<? extends Serializable>> rootTasks;
+ private Task<? extends Serializable> fetchTask;
private String astStringTree;
private HashSet<ReadEntity> inputs;
private ParseContext pCtx;
@@ -45,6 +46,8 @@ public class ExplainWork implements Seri
boolean dependency;
boolean logical;
+ boolean appendTaskType;
+
public ExplainWork() {
}
@@ -52,6 +55,7 @@ public class ExplainWork implements Seri
public ExplainWork(String resFile,
ParseContext pCtx,
List<Task<? extends Serializable>> rootTasks,
+ Task<? extends Serializable> fetchTask,
String astStringTree,
HashSet<ReadEntity> inputs,
boolean extended,
@@ -60,6 +64,7 @@ public class ExplainWork implements Seri
boolean logical) {
this.resFile = resFile;
this.rootTasks = new ArrayList<Task<? extends Serializable>>(rootTasks);
+ this.fetchTask = fetchTask;
this.astStringTree = astStringTree;
this.inputs = inputs;
this.extended = extended;
@@ -85,6 +90,14 @@ public class ExplainWork implements Seri
this.rootTasks = rootTasks;
}
+ public Task<? extends Serializable> getFetchTask() {
+ return fetchTask;
+ }
+
+ public void setFetchTask(Task<? extends Serializable> fetchTask) {
+ this.fetchTask = fetchTask;
+ }
+
public String getAstStringTree() {
return astStringTree;
}
@@ -141,4 +154,11 @@ public class ExplainWork implements Seri
this.logical = logical;
}
+ public boolean isAppendTaskType() {
+ return appendTaskType;
+ }
+
+ public void setAppendTaskType(boolean appendTaskType) {
+ this.appendTaskType = appendTaskType;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java Tue Nov 12 18:23:05 2013
@@ -62,7 +62,7 @@ public class UDFOPDivide extends UDF {
public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) {
// LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":"
// + b);
- if ((a == null) || (b == null)) {
+ if (a == null || b == null || b.get() == 0.0) {
return null;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Tue Nov 12 18:23:05 2013
@@ -83,6 +83,7 @@ public class GenericUDAFComputeStats ext
case DOUBLE:
return new GenericUDAFDoubleStatsEvaluator();
case STRING:
+ case CHAR:
case VARCHAR:
return new GenericUDAFStringStatsEvaluator();
case BINARY:
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java Tue Nov 12 18:23:05 2013
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BytesWritable;
/**
@@ -62,7 +63,9 @@ public class GenericUDFConcat extends Ge
// Loop through all the inputs to determine the appropriate return type/length.
// Return type:
+ // All CHAR inputs: return CHAR
// All VARCHAR inputs: return VARCHAR
+ // All CHAR/VARCHAR inputs: return VARCHAR
// All BINARY inputs: return BINARY
// Otherwise return STRING
argumentOIs = arguments;
@@ -88,10 +91,14 @@ public class GenericUDFConcat extends Ge
returnType = PrimitiveCategory.STRING;
}
break;
+ case CHAR:
case VARCHAR:
if (!fixedLengthReturnValue) {
returnType = PrimitiveCategory.STRING;
}
+ if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
+ returnType = PrimitiveCategory.VARCHAR;
+ }
break;
default:
returnType = PrimitiveCategory.STRING;
@@ -104,8 +111,10 @@ public class GenericUDFConcat extends Ge
// max length for the char/varchar, then the return type reverts to string.
if (fixedLengthReturnValue) {
returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
- if (returnType == PrimitiveCategory.VARCHAR
- && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) {
+ if ((returnType == PrimitiveCategory.VARCHAR
+ && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH)
+ || (returnType == PrimitiveCategory.CHAR
+ && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
returnType = PrimitiveCategory.STRING;
fixedLengthReturnValue = false;
}
@@ -119,11 +128,15 @@ public class GenericUDFConcat extends Ge
// treat all inputs as string, the return value will be converted to the appropriate type.
createStringConverters();
returnHelper = new GenericUDFUtils.StringHelper(returnType);
+ BaseCharTypeInfo typeInfo;
switch (returnType) {
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ case CHAR:
+ typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
case VARCHAR:
- VarcharTypeInfo typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
+ typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
default:
throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java Tue Nov 12 18:23:05 2013
@@ -31,8 +31,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
/**
* UDFLower.
@@ -65,11 +65,19 @@ public class GenericUDFLower extends Gen
stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI);
PrimitiveCategory inputType = argumentOI.getPrimitiveCategory();
ObjectInspector outputOI = null;
+ BaseCharTypeInfo typeInfo;
switch (inputType) {
+ case CHAR:
+ // return type should have same length as the input.
+ returnType = inputType;
+ typeInfo = TypeInfoFactory.getCharTypeInfo(
+ GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
+ outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
+ break;
case VARCHAR:
// return type should have same length as the input.
returnType = inputType;
- VarcharTypeInfo typeInfo = TypeInfoFactory.getVarcharTypeInfo(
+ typeInfo = TypeInfoFactory.getVarcharTypeInfo(
GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
break;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java Tue Nov 12 18:23:05 2013
@@ -97,7 +97,7 @@ public class GenericUDFOPEqualOrGreaterT
Text t0, t1;
t0 = soi0.getPrimitiveWritableObject(o0);
t1 = soi1.getPrimitiveWritableObject(o1);
- result.set(ShimLoader.getHadoopShims().compareText(t0, t1) >= 0);
+ result.set(t0.compareTo(t1) >= 0);
break;
case COMPARE_INT:
result.set(ioi0.get(o0) >= ioi1.get(o1));
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java Tue Nov 12 18:23:05 2013
@@ -97,7 +97,7 @@ public class GenericUDFOPEqualOrLessThan
Text t0, t1;
t0 = soi0.getPrimitiveWritableObject(o0);
t1 = soi1.getPrimitiveWritableObject(o1);
- result.set(ShimLoader.getHadoopShims().compareText(t0, t1) <= 0);
+ result.set(t0.compareTo(t1) <= 0);
break;
case COMPARE_INT:
result.set(ioi0.get(o0) <= ioi1.get(o1));
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java Tue Nov 12 18:23:05 2013
@@ -97,7 +97,7 @@ public class GenericUDFOPGreaterThan ext
Text t0, t1;
t0 = soi0.getPrimitiveWritableObject(o0);
t1 = soi1.getPrimitiveWritableObject(o1);
- result.set(ShimLoader.getHadoopShims().compareText(t0, t1) > 0);
+ result.set(t0.compareTo(t1) > 0);
break;
case COMPARE_INT:
result.set(ioi0.get(o0) > ioi1.get(o1));
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java Tue Nov 12 18:23:05 2013
@@ -68,7 +68,7 @@ public class GenericUDFOPLessThan extend
Text t0, t1;
t0 = soi0.getPrimitiveWritableObject(o0);
t1 = soi1.getPrimitiveWritableObject(o1);
- result.set(ShimLoader.getHadoopShims().compareText(t0, t1) < 0);
+ result.set(t0.compareTo(t1) < 0);
break;
case COMPARE_INT:
result.set(ioi0.get(o0) < ioi1.get(o1));