You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/06/30 22:00:14 UTC
[1/7] orc git commit: HIVE-14012. Some ColumnVector subclasses are
missing ensureSize.
Repository: orc
Updated Branches:
refs/heads/master d9d529bcf -> 98c0992ed
HIVE-14012. Some ColumnVector subclasses are missing ensureSize.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/e8c0eb54
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/e8c0eb54
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/e8c0eb54
Branch: refs/heads/master
Commit: e8c0eb541c31de500ca0428815347f4595c8e4c1
Parents: 17e14f6
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:23:26 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700
----------------------------------------------------------------------
.../exec/vector/IntervalDayTimeColumnVector.java | 19 +++++++++++++++++++
.../ql/exec/vector/TimestampColumnVector.java | 19 +++++++++++++++++++
2 files changed, 38 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/e8c0eb54/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
index 39ccea8..c4a6c0f 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -345,4 +345,23 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
buffer.append("null");
}
}
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (size <= totalSeconds.length) return;
+ long[] oldTime = totalSeconds;
+ int[] oldNanos = nanos;
+ totalSeconds = new long[size];
+ nanos = new int[size];
+ if (preserveData) {
+ if (isRepeating) {
+ totalSeconds[0] = oldTime[0];
+ nanos[0] = oldNanos[0];
+ } else {
+ System.arraycopy(oldTime, 0, totalSeconds, 0, oldTime.length);
+ System.arraycopy(oldNanos, 0, nanos, 0, oldNanos.length);
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/e8c0eb54/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index 228461a..0948d2d 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -397,4 +397,23 @@ public class TimestampColumnVector extends ColumnVector {
buffer.append("null");
}
}
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (size <= time.length) return;
+ long[] oldTime = time;
+ int[] oldNanos = nanos;
+ time = new long[size];
+ nanos = new int[size];
+ if (preserveData) {
+ if (isRepeating) {
+ time[0] = oldTime[0];
+ nanos[0] = oldNanos[0];
+ } else {
+ System.arraycopy(oldTime, 0, time, 0, oldTime.length);
+ System.arraycopy(oldNanos, 0, nanos, 0, oldNanos.length);
+ }
+ }
+ }
}
[7/7] orc git commit: HIVE-13872. Fix cross-product reduce sink
serialization.
Posted by om...@apache.org.
HIVE-13872. Fix cross-product reduce sink serialization.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/98c0992e
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/98c0992e
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/98c0992e
Branch: refs/heads/master
Commit: 98c0992ed5fb3c2dde962b405b8fbb71b6d6be85
Parents: 13ee0b3
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:41:48 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:58 2016 -0700
----------------------------------------------------------------------
.../src/java/org/apache/orc/impl/TreeReaderFactory.java | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/98c0992e/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 5901c8c..c4a2093 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -1732,9 +1732,12 @@ public class TreeReaderFactory {
int batchSize) throws IOException {
for(int i=0; i < fields.length &&
(vectorColumnCount == -1 || i < vectorColumnCount); ++i) {
- batch.cols[i].reset();
- batch.cols[i].ensureSize((int) batchSize, false);
- fields[i].nextVector(batch.cols[i], null, batchSize);
+ ColumnVector colVector = batch.cols[i];
+ if (colVector != null) {
+ colVector.reset();
+ colVector.ensureSize((int) batchSize, false);
+ fields[i].nextVector(colVector, null, batchSize);
+ }
}
}
[6/7] orc git commit: HIVE-13985. ORC improvements for reducing the
file system calls in the task side.
Posted by om...@apache.org.
HIVE-13985. ORC improvements for reducing the file system calls in the task side.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/13ee0b3c
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/13ee0b3c
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/13ee0b3c
Branch: refs/heads/master
Commit: 13ee0b3cdb10585b8a3c0799f8e7685472d8458e
Parents: 047265c
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:38:32 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:58 2016 -0700
----------------------------------------------------------------------
.../src/java/org/apache/orc/FileMetaInfo.java | 64 -----
java/core/src/java/org/apache/orc/OrcFile.java | 32 +--
java/core/src/java/org/apache/orc/OrcUtils.java | 11 +
java/core/src/java/org/apache/orc/Reader.java | 9 +-
.../src/java/org/apache/orc/impl/OrcTail.java | 140 +++++++++++
.../java/org/apache/orc/impl/ReaderImpl.java | 240 ++++++++++---------
proto/orc_proto.proto | 1 +
7 files changed, 304 insertions(+), 193 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/FileMetaInfo.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/FileMetaInfo.java b/java/core/src/java/org/apache/orc/FileMetaInfo.java
deleted file mode 100644
index d3cac3b..0000000
--- a/java/core/src/java/org/apache/orc/FileMetaInfo.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.nio.ByteBuffer;
-import java.util.List;
-
-/**
- * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
- * that is useful for Reader implementation
- *
- */
-public class FileMetaInfo {
- public ByteBuffer footerMetaAndPsBuffer;
- public final String compressionType;
- public final int bufferSize;
- public final int metadataSize;
- public final ByteBuffer footerBuffer;
- public final List<Integer> versionList;
- public final OrcFile.WriterVersion writerVersion;
-
-
- /** Ctor used when reading splits - no version list or full footer buffer. */
- public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
- this(compressionType, bufferSize, metadataSize, footerBuffer, null,
- writerVersion, null);
- }
-
- /** Ctor used when creating file info during init and when getting a new one. */
- public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, List<Integer> versionList,
- OrcFile.WriterVersion writerVersion,
- ByteBuffer fullFooterBuffer) {
- this.compressionType = compressionType;
- this.bufferSize = bufferSize;
- this.metadataSize = metadataSize;
- this.footerBuffer = footerBuffer;
- this.versionList = versionList;
- this.writerVersion = writerVersion;
- this.footerMetaAndPsBuffer = fullFooterBuffer;
- }
-
- public OrcFile.WriterVersion getWriterVersion() {
- return writerVersion;
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 7dd7333..ddfa9f7 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.impl.MemoryManager;
+import org.apache.orc.impl.OrcTail;
import org.apache.orc.impl.ReaderImpl;
import org.apache.orc.impl.WriterImpl;
@@ -160,19 +161,17 @@ public class OrcFile {
public static class ReaderOptions {
private final Configuration conf;
private FileSystem filesystem;
- private FileMetaInfo fileMetaInfo; // TODO: this comes from some place.
private long maxLength = Long.MAX_VALUE;
- private FileMetadata fullFileMetadata; // Propagate from LLAP cache.
+ private OrcTail orcTail;
+ // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
+ // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
+ // For now keeping this around to avoid complex surgery
+ private FileMetadata fileMetadata;
public ReaderOptions(Configuration conf) {
this.conf = conf;
}
- public ReaderOptions fileMetaInfo(FileMetaInfo info) {
- fileMetaInfo = info;
- return this;
- }
-
public ReaderOptions filesystem(FileSystem fs) {
this.filesystem = fs;
return this;
@@ -183,8 +182,8 @@ public class OrcFile {
return this;
}
- public ReaderOptions fileMetadata(FileMetadata metadata) {
- this.fullFileMetadata = metadata;
+ public ReaderOptions orcTail(OrcTail tail) {
+ this.orcTail = tail;
return this;
}
@@ -196,16 +195,21 @@ public class OrcFile {
return filesystem;
}
- public FileMetaInfo getFileMetaInfo() {
- return fileMetaInfo;
- }
-
public long getMaxLength() {
return maxLength;
}
+ public OrcTail getOrcTail() {
+ return orcTail;
+ }
+
+ public ReaderOptions fileMetadata(final FileMetadata metadata) {
+ fileMetadata = metadata;
+ return this;
+ }
+
public FileMetadata getFileMetadata() {
- return fullFileMetadata;
+ return fileMetadata;
}
}
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java
index 9dd7504..94493b3 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -17,6 +17,8 @@
*/
package org.apache.orc;
+import org.apache.orc.impl.ReaderImpl;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -525,4 +527,13 @@ public class OrcUtils {
}
throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
}
+
+ public static List<StripeInformation> convertProtoStripesToStripes(
+ List<OrcProto.StripeInformation> stripes) {
+ List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
+ for (OrcProto.StripeInformation info : stripes) {
+ result.add(new ReaderImpl.StripeInformationImpl(info));
+ }
+ return result;
+ }
}
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 87f3293..c2d5235 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -138,6 +138,13 @@ public interface Reader {
OrcFile.WriterVersion getWriterVersion();
/**
+ * Get the file tail (footer + postscript)
+ *
+ * @return - file tail
+ */
+ OrcProto.FileTail getFileTail();
+
+ /**
* Options for creating a RecordReader.
*/
public static class Options {
@@ -354,7 +361,7 @@ public interface Reader {
/**
* @return Stripe statistics.
*/
- List<StripeStatistics> getStripeStatistics();
+ List<StripeStatistics> getStripeStatistics() throws IOException;
/**
* @return File statistics, in original protobuf form.
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/impl/OrcTail.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/OrcTail.java b/java/core/src/java/org/apache/orc/impl/OrcTail.java
new file mode 100644
index 0000000..b5f85fb
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/impl/OrcTail.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import static org.apache.orc.impl.ReaderImpl.extractMetadata;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
+
+// TODO: Make OrcTail implement FileMetadata or Reader interface
+public final class OrcTail {
+ // postscript + footer - Serialized in OrcSplit
+ private final OrcProto.FileTail fileTail;
+ // serialized representation of metadata, footer and postscript
+ private final ByteBuffer serializedTail;
+ // used to invalidate cache entries
+ private final long fileModificationTime;
+ // lazily deserialized
+ private OrcProto.Metadata metadata;
+
+ public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
+ this(fileTail, serializedTail, -1);
+ }
+
+ public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
+ this.fileTail = fileTail;
+ this.serializedTail = serializedTail;
+ this.fileModificationTime = fileModificationTime;
+ this.metadata = null;
+ }
+
+ public ByteBuffer getSerializedTail() {
+ return serializedTail;
+ }
+
+ public long getFileModificationTime() {
+ return fileModificationTime;
+ }
+
+ public OrcProto.Footer getFooter() {
+ return fileTail.getFooter();
+ }
+
+ public OrcProto.PostScript getPostScript() {
+ return fileTail.getPostscript();
+ }
+
+ public OrcFile.WriterVersion getWriterVersion() {
+ OrcProto.PostScript ps = fileTail.getPostscript();
+ return (ps.hasWriterVersion()
+ ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+ }
+
+ public List<StripeInformation> getStripes() {
+ List<StripeInformation> result = new ArrayList<>(fileTail.getFooter().getStripesCount());
+ for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
+ result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
+ }
+ return result;
+ }
+
+ public CompressionKind getCompressionKind() {
+ return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
+ }
+
+ public CompressionCodec getCompressionCodec() {
+ return WriterImpl.createCodec(getCompressionKind());
+ }
+
+ public int getCompressionBufferSize() {
+ return (int) fileTail.getPostscript().getCompressionBlockSize();
+ }
+
+ public List<StripeStatistics> getStripeStatistics() throws IOException {
+ List<StripeStatistics> result = new ArrayList<>();
+ List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
+ if (ssProto != null) {
+ for (OrcProto.StripeStatistics ss : ssProto) {
+ result.add(new StripeStatistics(ss.getColStatsList()));
+ }
+ }
+ return result;
+ }
+
+ public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
+ if (serializedTail == null) return null;
+ if (metadata == null) {
+ metadata = extractMetadata(serializedTail, 0,
+ (int) fileTail.getPostscript().getMetadataLength(),
+ getCompressionCodec(), getCompressionBufferSize());
+ // clear does not clear the contents but sets position to 0 and limit = capacity
+ serializedTail.clear();
+ }
+ return metadata.getStripeStatsList();
+ }
+
+ public int getMetadataSize() {
+ return (int) getPostScript().getMetadataLength();
+ }
+
+ public List<OrcProto.Type> getTypes() {
+ return getFooter().getTypesList();
+ }
+
+ public OrcProto.FileTail getFileTail() {
+ return fileTail;
+ }
+
+ public OrcProto.FileTail getMinimalFileTail() {
+ OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
+ OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
+ footerBuilder.clearStatistics();
+ fileTailBuilder.setFooter(footerBuilder.build());
+ OrcProto.FileTail result = fileTailBuilder.build();
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 7625d4a..a18f922 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -27,6 +27,9 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.FileMetadata;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcUtils;
import org.apache.orc.Reader;
@@ -35,8 +38,6 @@ import org.apache.orc.TypeDescription;
import org.apache.orc.ColumnStatistics;
import org.apache.orc.CompressionCodec;
import org.apache.orc.FileFormatException;
-import org.apache.orc.FileMetaInfo;
-import org.apache.orc.FileMetadata;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;
import org.slf4j.Logger;
@@ -62,27 +63,25 @@ public class ReaderImpl implements Reader {
private final long maxLength;
protected final Path path;
protected final org.apache.orc.CompressionKind compressionKind;
- protected final CompressionCodec codec;
- protected final int bufferSize;
- private final List<OrcProto.StripeStatistics> stripeStats;
+ protected CompressionCodec codec;
+ protected int bufferSize;
+ protected OrcProto.Metadata metadata;
+ private List<OrcProto.StripeStatistics> stripeStats;
private final int metadataSize;
protected final List<OrcProto.Type> types;
- private final TypeDescription schema;
+ private TypeDescription schema;
private final List<OrcProto.UserMetadataItem> userMetadata;
private final List<OrcProto.ColumnStatistics> fileStats;
private final List<StripeInformation> stripes;
protected final int rowIndexStride;
private final long contentLength, numberOfRows;
-
private long deserializedSize = -1;
protected final Configuration conf;
private final List<Integer> versionList;
private final OrcFile.WriterVersion writerVersion;
- // Same for metastore cache - maintains the same background buffer, but includes postscript.
- // This will only be set if the file footer/metadata was read from disk.
- private final ByteBuffer footerMetaAndPsBuffer;
+ protected OrcTail tail;
public static class StripeInformationImpl
implements StripeInformation {
@@ -206,6 +205,11 @@ public class ReaderImpl implements Reader {
}
@Override
+ public OrcProto.FileTail getFileTail() {
+ return tail.getFileTail();
+ }
+
+ @Override
public int getRowIndexStride() {
return rowIndexStride;
}
@@ -260,6 +264,32 @@ public class ReaderImpl implements Reader {
}
/**
+ * Ensure this is an ORC file to prevent users from trying to read text
+ * files or RC files as ORC files.
+ * @param psLen the postscript length
+ * @param buffer the tail of the file
+ * @throws IOException
+ */
+ protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
+ int magicLength = OrcFile.MAGIC.length();
+ int fullLength = magicLength + 1;
+ if (psLen < fullLength || buffer.remaining() < fullLength) {
+ throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+ }
+
+ int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
+ byte[] array = buffer.array();
+ // now look for the magic string at the end of the postscript.
+ if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
+ // if it isn't there, this may be 0.11.0 version of the ORC file.
+ // Read the first 3 bytes from the buffer to check for the header
+ if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
+ throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+ }
+ }
+ }
+
+ /**
* Build a version string out of an array.
* @param version the version number as a list
* @return the human readable form of the version string
@@ -315,7 +345,6 @@ public class ReaderImpl implements Reader {
this.path = path;
this.conf = options.getConfiguration();
this.maxLength = options.getMaxLength();
-
FileMetadata fileMetadata = options.getFileMetadata();
if (fileMetadata != null) {
this.compressionKind = fileMetadata.getCompressionKind();
@@ -333,38 +362,28 @@ public class ReaderImpl implements Reader {
this.fileStats = fileMetadata.getFileStats();
this.stripes = fileMetadata.getStripes();
this.userMetadata = null; // not cached and not needed here
- this.footerMetaAndPsBuffer = null;
} else {
- FileMetaInfo footerMetaData;
- if (options.getFileMetaInfo() != null) {
- footerMetaData = options.getFileMetaInfo();
- this.footerMetaAndPsBuffer = null;
+ OrcTail orcTail = options.getOrcTail();
+ if (orcTail == null) {
+ tail = extractFileTail(fs, path, options.getMaxLength());
+ options.orcTail(tail);
} else {
- footerMetaData = extractMetaInfoFromFooter(fs, path,
- options.getMaxLength());
- this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
+ tail = orcTail;
}
- options.fileMetaInfo(footerMetaData);
- MetaInfoObjExtractor rInfo =
- new MetaInfoObjExtractor(footerMetaData.compressionType,
- footerMetaData.bufferSize,
- footerMetaData.metadataSize,
- footerMetaData.footerBuffer
- );
- this.compressionKind = rInfo.compressionKind;
- this.codec = rInfo.codec;
- this.bufferSize = rInfo.bufferSize;
- this.metadataSize = rInfo.metadataSize;
- this.stripeStats = rInfo.metadata.getStripeStatsList();
- this.types = rInfo.footer.getTypesList();
- this.rowIndexStride = rInfo.footer.getRowIndexStride();
- this.contentLength = rInfo.footer.getContentLength();
- this.numberOfRows = rInfo.footer.getNumberOfRows();
- this.userMetadata = rInfo.footer.getMetadataList();
- this.fileStats = rInfo.footer.getStatisticsList();
- this.versionList = footerMetaData.versionList;
- this.writerVersion = footerMetaData.writerVersion;
- this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList());
+ this.compressionKind = tail.getCompressionKind();
+ this.codec = tail.getCompressionCodec();
+ this.bufferSize = tail.getCompressionBufferSize();
+ this.metadataSize = tail.getMetadataSize();
+ this.versionList = tail.getPostScript().getVersionList();
+ this.types = tail.getFooter().getTypesList();
+ this.rowIndexStride = tail.getFooter().getRowIndexStride();
+ this.contentLength = tail.getFooter().getContentLength();
+ this.numberOfRows = tail.getFooter().getNumberOfRows();
+ this.userMetadata = tail.getFooter().getMetadataList();
+ this.fileStats = tail.getFooter().getStatisticsList();
+ this.writerVersion = tail.getWriterVersion();
+ this.stripes = tail.getStripes();
+ this.stripeStats = tail.getStripeStatisticsProto();
}
this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
}
@@ -397,7 +416,7 @@ public class ReaderImpl implements Reader {
singleton(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
}
- private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+ public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
bb.position(metadataAbsPos);
bb.limit(metadataAbsPos + metadataSize);
@@ -430,22 +449,55 @@ public class ReaderImpl implements Reader {
return ps;
}
- private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
- Path path,
- long maxFileLength
- ) throws IOException {
+ public static OrcTail extractFileTail(ByteBuffer buffer)
+ throws IOException {
+ return extractFileTail(buffer, -1, -1);
+ }
+
+ public static OrcTail extractFileTail(ByteBuffer buffer, long fileLength, long modificationTime)
+ throws IOException {
+ int readSize = buffer.limit();
+ int psLen = buffer.get(readSize - 1) & 0xff;
+ int psOffset = readSize - 1 - psLen;
+ ensureOrcFooter(buffer, psLen);
+ byte[] psBuffer = new byte[psLen];
+ System.arraycopy(buffer.array(), psOffset, psBuffer, 0, psLen);
+ OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(psBuffer);
+ int footerSize = (int) ps.getFooterLength();
+ CompressionCodec codec = WriterImpl
+ .createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+ OrcProto.Footer footer = extractFooter(buffer,
+ (int) (buffer.position() + ps.getMetadataLength()),
+ footerSize, codec, (int) ps.getCompressionBlockSize());
+ OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder()
+ .setPostscriptLength(psLen)
+ .setPostscript(ps)
+ .setFooter(footer)
+ .setFileLength(fileLength);
+ // clear does not clear the contents but sets position to 0 and limit = capacity
+ buffer.clear();
+ return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
+ }
+
+ protected OrcTail extractFileTail(FileSystem fs, Path path,
+ long maxFileLength) throws IOException {
FSDataInputStream file = fs.open(path);
- ByteBuffer buffer = null, fullFooterBuffer = null;
- OrcProto.PostScript ps = null;
- OrcFile.WriterVersion writerVersion = null;
+ ByteBuffer buffer;
+ OrcProto.PostScript ps;
+ OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
+ long modificationTime;
try {
// figure out the size of the file using the option or filesystem
long size;
if (maxFileLength == Long.MAX_VALUE) {
- size = fs.getFileStatus(path).getLen();
+ FileStatus fileStatus = fs.getFileStatus(path);
+ size = fileStatus.getLen();
+ modificationTime = fileStatus.getModificationTime();
} else {
size = maxFileLength;
+ modificationTime = -1;
}
+ fileTailBuilder.setFileLength(size);
//read last bytes into buffer to get PostScript
int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
@@ -461,13 +513,16 @@ public class ReaderImpl implements Reader {
ensureOrcFooter(file, path, psLen, buffer);
int psOffset = readSize - 1 - psLen;
ps = extractPostScript(buffer, path, psLen, psOffset);
+ bufferSize = (int) ps.getCompressionBlockSize();
+ codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+ fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);
int footerSize = (int) ps.getFooterLength();
int metadataSize = (int) ps.getMetadataLength();
- writerVersion = extractWriterVersion(ps);
//check if extra bytes need to be read
int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
+ int tailSize = 1 + psLen + footerSize + metadataSize;
if (extra > 0) {
//more bytes need to be read, seek back to the right place and read extra bytes
ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
@@ -478,17 +533,23 @@ public class ReaderImpl implements Reader {
extraBuf.put(buffer);
buffer = extraBuf;
buffer.position(0);
- fullFooterBuffer = buffer.slice();
- buffer.limit(footerSize + metadataSize);
+ buffer.limit(tailSize);
+ readSize += extra;
+ psOffset = readSize - 1 - psLen;
} else {
//footer is already in the bytes in buffer, just adjust position, length
buffer.position(psOffset - footerSize - metadataSize);
- fullFooterBuffer = buffer.slice();
- buffer.limit(psOffset);
+ buffer.limit(buffer.position() + tailSize);
}
- // remember position for later TODO: what later? this comment is useless
buffer.mark();
+ int footerOffset = psOffset - footerSize;
+ buffer.position(footerOffset);
+ ByteBuffer footerBuffer = buffer.slice();
+ buffer.reset();
+ OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize,
+ codec, bufferSize);
+ fileTailBuilder.setFooter(footer);
} finally {
try {
file.close();
@@ -497,68 +558,15 @@ public class ReaderImpl implements Reader {
}
}
- return new FileMetaInfo(
- ps.getCompression().toString(),
- (int) ps.getCompressionBlockSize(),
- (int) ps.getMetadataLength(),
- buffer,
- ps.getVersionList(),
- writerVersion,
- fullFooterBuffer
- );
- }
-
- protected static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) {
- return (ps.hasWriterVersion()
- ? getWriterVersion(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
- }
-
- protected static List<StripeInformation> convertProtoStripesToStripes(
- List<OrcProto.StripeInformation> stripes) {
- List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
- for (OrcProto.StripeInformation info : stripes) {
- result.add(new StripeInformationImpl(info));
- }
- return result;
- }
-
- /**
- * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
- * from serialized fields.
- * As the fields are final, the fields need to be initialized in the constructor and
- * can't be done in some helper function. So this helper class is used instead.
- *
- */
- private static class MetaInfoObjExtractor{
- final org.apache.orc.CompressionKind compressionKind;
- final CompressionCodec codec;
- final int bufferSize;
- final int metadataSize;
- final OrcProto.Metadata metadata;
- final OrcProto.Footer footer;
-
- MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer) throws IOException {
-
- this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr.toUpperCase());
- this.bufferSize = bufferSize;
- this.codec = WriterImpl.createCodec(compressionKind);
- this.metadataSize = metadataSize;
-
- int position = footerBuffer.position();
- int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
-
- this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
- this.footer = extractFooter(
- footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
-
- footerBuffer.position(position);
- }
+ ByteBuffer serializedTail = ByteBuffer.allocate(buffer.remaining());
+ serializedTail.put(buffer.slice());
+ serializedTail.rewind();
+ return new OrcTail(fileTailBuilder.build(), serializedTail, modificationTime);
}
@Override
public ByteBuffer getSerializedFileFooter() {
- return footerMetaAndPsBuffer;
+ return tail.getSerializedTail();
}
@Override
@@ -727,7 +735,11 @@ public class ReaderImpl implements Reader {
}
@Override
- public List<StripeStatistics> getStripeStatistics() {
+ public List<StripeStatistics> getStripeStatistics() throws IOException {
+ if (stripeStats == null && metadata == null) {
+ metadata = extractMetadata(tail.getSerializedTail(), 0, metadataSize, codec, bufferSize);
+ stripeStats = metadata.getStripeStatsList();
+ }
List<StripeStatistics> result = new ArrayList<>();
for (OrcProto.StripeStatistics ss : stripeStats) {
result.add(new StripeStatistics(ss.getColStatsList()));
http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 6b7e597..dbc34ab 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -224,6 +224,7 @@ message PostScript {
}
// The contents of the file tail that must be serialized.
+// This gets serialized as part of OrcSplit, also used by footer cache.
message FileTail {
optional PostScript postscript = 1;
optional Footer footer = 2;
[3/7] orc git commit: Updating the versions of storage api for
merging changes from Hive.
Posted by om...@apache.org.
Updating the versions of storage api for merging changes from Hive.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/cbedf88d
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/cbedf88d
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/cbedf88d
Branch: refs/heads/master
Commit: cbedf88d19951f6d93d0edcb333ec79231cd8dc9
Parents: d9d529b
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 11:02:06 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700
----------------------------------------------------------------------
java/CMakeLists.txt | 2 +-
java/pom.xml | 2 +-
java/storage-api/pom.xml | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/cbedf88d/java/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt
index b1be1dc..e8ca7ff 100644
--- a/java/CMakeLists.txt
+++ b/java/CMakeLists.txt
@@ -15,7 +15,7 @@ execute_process(COMMAND mvn versions:set -DnewVersion=${ORC_VERSION}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
set(ORC_JARS
- ${CMAKE_CURRENT_BINARY_DIR}/storage-api/hive-storage-api-2.1.0-pre-orc.jar
+ ${CMAKE_CURRENT_BINARY_DIR}/storage-api/hive-storage-api-2.1.1-pre-orc.jar
${CMAKE_CURRENT_BINARY_DIR}/core/orc-core-${ORC_VERSION}.jar
${CMAKE_CURRENT_BINARY_DIR}/mapreduce/orc-mapreduce-${ORC_VERSION}.jar
)
http://git-wip-us.apache.org/repos/asf/orc/blob/cbedf88d/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index f707cb6..01fdf04 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -69,7 +69,7 @@
<test.tmp.dir>${project.build.directory}/testing-tmp</test.tmp.dir>
<hadoop.version>2.6.0</hadoop.version>
- <storage-api.version>2.1.0-pre-orc</storage-api.version>
+ <storage-api.version>2.1.1-pre-orc</storage-api.version>
</properties>
<build>
http://git-wip-us.apache.org/repos/asf/orc/blob/cbedf88d/java/storage-api/pom.xml
----------------------------------------------------------------------
diff --git a/java/storage-api/pom.xml b/java/storage-api/pom.xml
index 86463e6..fae7e9a 100644
--- a/java/storage-api/pom.xml
+++ b/java/storage-api/pom.xml
@@ -25,8 +25,8 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
<!-- remove our custom version of storage-api once we get the changes
- released as hive 2.1.0 -->
- <version>2.1.0-pre-orc</version>
+ released as hive 2.1.1 -->
+ <version>2.1.1-pre-orc</version>
<packaging>jar</packaging>
<name>Hive Storage API</name>
[2/7] orc git commit: HIVE-13948 Incorrect timezone handling in
Writable.
Posted by om...@apache.org.
HIVE-13948 Incorrect timezone handling in Writable.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/68cdbd40
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/68cdbd40
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/68cdbd40
Branch: refs/heads/master
Commit: 68cdbd402e8bc81e176bb8175c1fb1173a4b3c23
Parents: cbedf88
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:19:30 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/serde2/io/DateWritable.java | 68 ++++++++++++++++----
1 file changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/68cdbd40/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
index dd2b1d9..637720a 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
import java.io.IOException;
import java.sql.Date;
import java.util.Calendar;
+import java.util.GregorianCalendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
@@ -41,7 +42,7 @@ public class DateWritable implements WritableComparable<DateWritable> {
private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);
- // Local time zone.
+ // Local time zone. Store separately because Calendar would clone it.
// Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
private static final ThreadLocal<TimeZone> LOCAL_TIMEZONE = new ThreadLocal<TimeZone>() {
@Override
@@ -50,6 +51,19 @@ public class DateWritable implements WritableComparable<DateWritable> {
}
};
+ private static final ThreadLocal<Calendar> UTC_CALENDAR = new ThreadLocal<Calendar>() {
+ @Override
+ protected Calendar initialValue() {
+ return new GregorianCalendar(TimeZone.getTimeZone("UTC"));
+ }
+ };
+ private static final ThreadLocal<Calendar> LOCAL_CALENDAR = new ThreadLocal<Calendar>() {
+ @Override
+ protected Calendar initialValue() {
+ return Calendar.getInstance();
+ }
+ };
+
// Internal representation is an integer representing day offset from our epoch value 1970-01-01
private int daysSinceEpoch = 0;
@@ -95,11 +109,16 @@ public class DateWritable implements WritableComparable<DateWritable> {
}
/**
- *
* @return Date value corresponding to the date in the local time zone
*/
public Date get() {
- return new Date(daysToMillis(daysSinceEpoch));
+ return get(true);
+ }
+
+ // TODO: we should call this more often. In theory, for DATE type, time should never matter, but
+ // it's hard to tell w/some code paths like UDFs/OIs etc. that are used in many places.
+ public Date get(boolean doesTimeMatter) {
+ return new Date(daysToMillis(daysSinceEpoch, doesTimeMatter));
}
public int getDays() {
@@ -119,21 +138,47 @@ public class DateWritable implements WritableComparable<DateWritable> {
}
public static long daysToMillis(int d) {
- // Convert from day offset to ms in UTC, then apply local timezone offset.
- long millisUtc = d * MILLIS_PER_DAY;
- long tmp = millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc);
- // Between millisUtc and tmp, the time zone offset may have changed due to DST.
- // Look up the offset again.
- return millisUtc - LOCAL_TIMEZONE.get().getOffset(tmp);
+ return daysToMillis(d, true);
+ }
+
+ public static long daysToMillis(int d, boolean doesTimeMatter) {
+ // What we are trying to get is the equivalent of new Date(ymd).getTime() in the local tz,
+ // where ymd is whatever d represents. How it "works" is this.
+ // First we get the UTC midnight for that day (which always exists, a small island of sanity).
+ long utcMidnight = d * MILLIS_PER_DAY;
+ // Now we take a local TZ offset at midnight UTC. Say we are in -4; that means (surprise
+ // surprise) that at midnight UTC it was 20:00 in local. So far we are on firm ground.
+ long utcMidnightOffset = LOCAL_TIMEZONE.get().getOffset(utcMidnight);
+ // And now we wander straight into the swamp, when instead of adding, we subtract it from UTC
+ // midnight to supposedly get local midnight (in the above case, 4:00 UTC). Of course, given
+ // all the insane DST variations, where we actually end up is anyone's guess.
+ long hopefullyMidnight = utcMidnight - utcMidnightOffset;
+ // Then we determine the local TZ offset at that magical time.
+ long offsetAtHM = LOCAL_TIMEZONE.get().getOffset(hopefullyMidnight);
+ // If the offsets are the same, we assume our initial jump did not cross any DST boundaries,
+ // and is thus valid. Both times flowed at the same pace. We congratulate ourselves and bail.
+ if (utcMidnightOffset == offsetAtHM) return hopefullyMidnight;
+ // Alas, we crossed some DST boundary. If the time of day doesn't matter to the caller, we'll
+ // simply get the next day and go back half a day. This is not ideal but seems to work.
+ if (!doesTimeMatter) return daysToMillis(d + 1) - (MILLIS_PER_DAY >> 1);
+ // Now, we could get previous and next day, figure our how many hours were inserted or removed,
+ // and from which of the days, etc. But at this point our gun is pointing straight at our foot,
+ // so let's just go the safe, expensive way.
+ Calendar utc = UTC_CALENDAR.get(), local = LOCAL_CALENDAR.get();
+ utc.setTimeInMillis(utcMidnight);
+ local.set(utc.get(Calendar.YEAR), utc.get(Calendar.MONTH), utc.get(Calendar.DAY_OF_MONTH));
+ return local.getTimeInMillis();
}
public static int millisToDays(long millisLocal) {
+ // We assume millisLocal is midnight of some date. What we are basically trying to do
+ // here is go from local-midnight to UTC-midnight (or whatever time that happens to be).
long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal);
int days;
if (millisUtc >= 0L) {
days = (int) (millisUtc / MILLIS_PER_DAY);
} else {
- days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY);
+ days = (int) ((millisUtc - 86399999 /*(MILLIS_PER_DAY - 1)*/) / MILLIS_PER_DAY);
}
return days;
}
@@ -169,7 +214,8 @@ public class DateWritable implements WritableComparable<DateWritable> {
@Override
public String toString() {
- return get().toString();
+ // For toString, the time does not matter
+ return get(false).toString();
}
@Override
[4/7] orc git commit: HIVE-14000 Changing a numeric type column
causes values other than NULL.
Posted by om...@apache.org.
HIVE-14000 Changing a numeric type column causes values other than NULL.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/17e14f62
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/17e14f62
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/17e14f62
Branch: refs/heads/master
Commit: 17e14f62d2315c5149e4e491ccf922913b13cc7e
Parents: 68cdbd4
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:21:24 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700
----------------------------------------------------------------------
.../orc/impl/ConvertTreeReaderFactory.java | 305 ++++++++++---------
.../hadoop/hive/ql/util/TimestampUtils.java | 70 +++--
2 files changed, 206 insertions(+), 169 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/17e14f62/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 3ba56f7..753e5bc 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -35,8 +35,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
import org.apache.hadoop.hive.ql.util.TimestampUtils;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.FloatWritable;
import org.apache.orc.OrcProto;
import org.apache.orc.TypeDescription;
import org.apache.orc.TypeDescription.Category;
@@ -263,6 +261,22 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
return string;
}
+ private static final double MIN_LONG_AS_DOUBLE = -0x1p63;
+ /*
+ * We cannot store Long.MAX_VALUE as a double without losing precision. Instead, we store
+ * Long.MAX_VALUE + 1 == -Long.MIN_VALUE, and then offset all comparisons by 1.
+ */
+ private static final double MAX_LONG_AS_DOUBLE_PLUS_ONE = 0x1p63;
+
+ public boolean doubleCanFitInLong(double doubleValue) {
+
+ // Borrowed from Guava DoubleMath.roundToLong except do not want dependency on Guava and we
+ // don't want to catch an exception.
+
+ return ((MIN_LONG_AS_DOUBLE - doubleValue < 1.0) &&
+ (doubleValue < MAX_LONG_AS_DOUBLE_PLUS_ONE));
+ }
+
@Override
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
// Pass-thru.
@@ -336,20 +350,44 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
}
}
- public long downCastAnyInteger(long input, TypeDescription readerType) {
- switch (readerType.getCategory()) {
+ public void downCastAnyInteger(LongColumnVector longColVector, int elementNum,
+ TypeDescription readerType) {
+ downCastAnyInteger(longColVector, elementNum, longColVector.vector[elementNum], readerType);
+ }
+
+ public void downCastAnyInteger(LongColumnVector longColVector, int elementNum, long inputLong,
+ TypeDescription readerType) {
+ long[] vector = longColVector.vector;
+ long outputLong;
+ Category readerCategory = readerType.getCategory();
+ switch (readerCategory) {
case BOOLEAN:
- return input == 0 ? 0 : 1;
+ // No data loss for boolean.
+ vector[elementNum] = inputLong == 0 ? 0 : 1;
+ return;
case BYTE:
- return (byte) input;
+ outputLong = (byte) inputLong;
+ break;
case SHORT:
- return (short) input;
+ outputLong = (short) inputLong;
+ break;
case INT:
- return (int) input;
+ outputLong = (int) inputLong;
+ break;
case LONG:
- return input;
+ // No data loss for long.
+ vector[elementNum] = inputLong;
+ return;
default:
- throw new RuntimeException("Unexpected type kind " + readerType.getCategory().name());
+ throw new RuntimeException("Unexpected type kind " + readerCategory.name());
+ }
+
+ if (outputLong != inputLong) {
+ // Data loss.
+ longColVector.isNull[elementNum] = true;
+ longColVector.noNulls = false;
+ } else {
+ vector[elementNum] = outputLong;
}
}
@@ -439,25 +477,22 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
anyIntegerAsLongTreeReader.nextVector(previousVector, isNull, batchSize);
LongColumnVector resultColVector = (LongColumnVector) previousVector;
if (downCastNeeded) {
- long[] resultVector = resultColVector.vector;
if (resultColVector.isRepeating) {
if (resultColVector.noNulls || !resultColVector.isNull[0]) {
- resultVector[0] = downCastAnyInteger(resultVector[0], readerType);
+ downCastAnyInteger(resultColVector, 0, readerType);
} else {
- resultColVector.noNulls = false;
- resultColVector.isNull[0] = true;
+ // Result remains null.
}
} else if (resultColVector.noNulls){
for (int i = 0; i < batchSize; i++) {
- resultVector[i] = downCastAnyInteger(resultVector[i], readerType);
+ downCastAnyInteger(resultColVector, i, readerType);
}
} else {
for (int i = 0; i < batchSize; i++) {
if (!resultColVector.isNull[i]) {
- resultVector[i] = downCastAnyInteger(resultVector[i], readerType);
+ downCastAnyInteger(resultColVector, i, readerType);
} else {
- resultColVector.noNulls = false;
- resultColVector.isNull[i] = true;
+ // Result remains null.
}
}
}
@@ -470,7 +505,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private FloatTreeReader floatTreeReader;
private final TypeDescription readerType;
- private FloatWritable floatResult;
private DoubleColumnVector doubleColVector;
private LongColumnVector longColVector;
@@ -480,15 +514,19 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
this.readerType = readerType;
floatTreeReader = new FloatTreeReader(columnId);
setConvertTreeReader(floatTreeReader);
- floatResult = new FloatWritable();
}
@Override
public void setConvertVectorElement(int elementNum) throws IOException {
- float floatValue = (float) doubleColVector.vector[elementNum];
- longColVector.vector[elementNum] =
- downCastAnyInteger(
- (long) floatValue, readerType);
+ double doubleValue = doubleColVector.vector[elementNum];
+ if (!doubleCanFitInLong(doubleValue)) {
+ longColVector.isNull[elementNum] = true;
+ longColVector.noNulls = false;
+ } else {
+ // UNDONE: Does the overflow check above using double really work here for float?
+ float floatValue = (float) doubleValue;
+ downCastAnyInteger(longColVector, elementNum, (long) floatValue, readerType);
+ }
}
@Override
@@ -525,9 +563,13 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
@Override
public void setConvertVectorElement(int elementNum) throws IOException {
- longColVector.vector[elementNum] =
- downCastAnyInteger(
- (long) doubleColVector.vector[elementNum], readerType);
+ double doubleValue = doubleColVector.vector[elementNum];
+ if (!doubleCanFitInLong(doubleValue)) {
+ longColVector.isNull[elementNum] = true;
+ longColVector.noNulls = false;
+ } else {
+ downCastAnyInteger(longColVector, elementNum, (long) doubleValue, readerType);
+ }
}
@Override
@@ -553,7 +595,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private final int precision;
private final int scale;
private final TypeDescription readerType;
- private HiveDecimalWritable hiveDecimalResult;
private DecimalColumnVector decimalColVector;
private LongColumnVector longColVector;
@@ -565,15 +606,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
this.readerType = readerType;
decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
setConvertTreeReader(decimalTreeReader);
- hiveDecimalResult = new HiveDecimalWritable();
}
+ private static HiveDecimal DECIMAL_MAX_LONG = HiveDecimal.create(Long.MAX_VALUE);
+ private static HiveDecimal DECIMAL_MIN_LONG = HiveDecimal.create(Long.MIN_VALUE);
+
@Override
public void setConvertVectorElement(int elementNum) throws IOException {
- longColVector.vector[elementNum] =
- downCastAnyInteger(
- decimalColVector.vector[elementNum].getHiveDecimal().longValue(),
- readerType);
+ HiveDecimal decimalValue = decimalColVector.vector[elementNum].getHiveDecimal();
+ if (decimalValue.compareTo(DECIMAL_MAX_LONG) > 0 ||
+ decimalValue.compareTo(DECIMAL_MIN_LONG) < 0) {
+ longColVector.isNull[elementNum] = true;
+ longColVector.noNulls = false;
+ } else {
+ downCastAnyInteger(longColVector, elementNum, decimalValue.longValue(), readerType);
+ }
}
@Override
@@ -596,7 +643,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private final TypeDescription readerType;
private BytesColumnVector bytesColVector;
private LongColumnVector longColVector;
@@ -604,7 +650,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
AnyIntegerFromStringGroupTreeReader(int columnId, TypeDescription fileType,
TypeDescription readerType) throws IOException {
super(columnId);
- this.fileType = fileType;
this.readerType = readerType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
@@ -615,8 +660,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
long longValue = parseLongFromString(string);
if (!getIsParseError()) {
- longColVector.vector[elementNum] =
- downCastAnyInteger(longValue, readerType);
+ downCastAnyInteger(longColVector, elementNum, longValue, readerType);
} else {
longColVector.noNulls = false;
longColVector.isNull[elementNum] = true;
@@ -660,8 +704,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
// Use TimestampWritable's getSeconds.
long longValue = TimestampUtils.millisToSeconds(
timestampColVector.asScratchTimestamp(elementNum).getTime());
- longColVector.vector[elementNum] =
- downCastAnyInteger(longValue, readerType);
+ downCastAnyInteger(longColVector, elementNum, longValue, readerType);
}
@Override
@@ -745,8 +788,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
if (resultColVector.noNulls || !resultColVector.isNull[0]) {
resultVector[0] = (float) resultVector[0];
} else {
- resultColVector.noNulls = false;
- resultColVector.isNull[0] = true;
+ // Remains null.
}
} else if (resultColVector.noNulls){
for (int i = 0; i < batchSize; i++) {
@@ -757,8 +799,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
if (!resultColVector.isNull[i]) {
resultVector[i] = (float) resultVector[i];
} else {
- resultColVector.noNulls = false;
- resultColVector.isNull[i] = true;
+ // Remains null.
}
}
}
@@ -771,8 +812,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private final int precision;
private final int scale;
- private final TypeDescription readerType;
- private HiveDecimalWritable hiveDecimalResult;
private DecimalColumnVector decimalColVector;
private DoubleColumnVector doubleColVector;
@@ -781,10 +820,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
super(columnId);
this.precision = fileType.getPrecision();
this.scale = fileType.getScale();
- this.readerType = readerType;
decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
setConvertTreeReader(decimalTreeReader);
- hiveDecimalResult = new HiveDecimalWritable();
}
@Override
@@ -813,14 +850,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private BytesColumnVector bytesColVector;
private DoubleColumnVector doubleColVector;
FloatFromStringGroupTreeReader(int columnId, TypeDescription fileType)
throws IOException {
super(columnId);
- this.fileType = fileType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
}
@@ -858,14 +893,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TimestampTreeReader timestampTreeReader;
- private final TypeDescription readerType;
private TimestampColumnVector timestampColVector;
private DoubleColumnVector doubleColVector;
- FloatFromTimestampTreeReader(int columnId, TypeDescription readerType,
- boolean skipCorrupt) throws IOException {
+ FloatFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
super(columnId);
- this.readerType = readerType;
timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
setConvertTreeReader(timestampTreeReader);
}
@@ -940,13 +972,10 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private FloatTreeReader floatTreeReader;
- private FloatWritable floatResult;
-
DoubleFromFloatTreeReader(int columnId) throws IOException {
super(columnId);
floatTreeReader = new FloatTreeReader(columnId);
setConvertTreeReader(floatTreeReader);
- floatResult = new FloatWritable();
}
@Override
@@ -964,20 +993,15 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private final int precision;
private final int scale;
- private final TypeDescription readerType;
- private HiveDecimalWritable hiveDecimalResult;
private DecimalColumnVector decimalColVector;
private DoubleColumnVector doubleColVector;
- DoubleFromDecimalTreeReader(int columnId, TypeDescription fileType,
- TypeDescription readerType) throws IOException {
+ DoubleFromDecimalTreeReader(int columnId, TypeDescription fileType) throws IOException {
super(columnId);
this.precision = fileType.getPrecision();
this.scale = fileType.getScale();
- this.readerType = readerType;
decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
setConvertTreeReader(decimalTreeReader);
- hiveDecimalResult = new HiveDecimalWritable();
}
@Override
@@ -1006,14 +1030,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private BytesColumnVector bytesColVector;
private DoubleColumnVector doubleColVector;
DoubleFromStringGroupTreeReader(int columnId, TypeDescription fileType)
throws IOException {
super(columnId);
- this.fileType = fileType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
}
@@ -1050,14 +1072,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TimestampTreeReader timestampTreeReader;
- private final TypeDescription readerType;
private TimestampColumnVector timestampColVector;
private DoubleColumnVector doubleColVector;
- DoubleFromTimestampTreeReader(int columnId, TypeDescription readerType,
- boolean skipCorrupt) throws IOException {
+ DoubleFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
super(columnId);
- this.readerType = readerType;
timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
setConvertTreeReader(timestampTreeReader);
}
@@ -1088,16 +1107,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
- private int precision;
- private int scale;
private LongColumnVector longColVector;
private DecimalColumnVector decimalColVector;
- DecimalFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
- TypeDescription readerType, boolean skipCorrupt) throws IOException {
+ DecimalFromAnyIntegerTreeReader(int columnId, TypeDescription fileType, boolean skipCorrupt)
+ throws IOException {
super(columnId);
- this.precision = readerType.getPrecision();
- this.scale = readerType.getScale();
anyIntegerAsLongTreeReader =
new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
setConvertTreeReader(anyIntegerAsLongTreeReader);
@@ -1106,8 +1121,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
@Override
public void setConvertVectorElement(int elementNum) {
long longValue = longColVector.vector[elementNum];
- HiveDecimalWritable hiveDecimalWritable =
- new HiveDecimalWritable(longValue);
+ HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(longValue);
+ // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
decimalColVector.set(elementNum, hiveDecimalWritable);
}
@@ -1131,30 +1146,25 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private FloatTreeReader floatTreeReader;
- private int precision;
- private int scale;
- private FloatWritable floatResult;
private DoubleColumnVector doubleColVector;
private DecimalColumnVector decimalColVector;
DecimalFromFloatTreeReader(int columnId, TypeDescription readerType)
throws IOException {
super(columnId);
- this.precision = readerType.getPrecision();
- this.scale = readerType.getScale();
floatTreeReader = new FloatTreeReader(columnId);
setConvertTreeReader(floatTreeReader);
- floatResult = new FloatWritable();
}
@Override
public void setConvertVectorElement(int elementNum) throws IOException {
float floatValue = (float) doubleColVector.vector[elementNum];
if (!Float.isNaN(floatValue)) {
- HiveDecimal value =
+ HiveDecimal decimalValue =
HiveDecimal.create(Float.toString(floatValue));
- if (value != null) {
- decimalColVector.set(elementNum, value);
+ if (decimalValue != null) {
+ // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
+ decimalColVector.set(elementNum, decimalValue);
} else {
decimalColVector.noNulls = false;
decimalColVector.isNull[elementNum] = true;
@@ -1227,14 +1237,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private BytesColumnVector bytesColVector;
private DecimalColumnVector decimalColVector;
DecimalFromStringGroupTreeReader(int columnId, TypeDescription fileType,
TypeDescription readerType) throws IOException {
super(columnId);
- this.fileType = fileType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
}
@@ -1244,6 +1252,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
HiveDecimal value = parseDecimalFromString(string);
if (value != null) {
+ // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
decimalColVector.set(elementNum, value);
} else {
decimalColVector.noNulls = false;
@@ -1271,18 +1280,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TimestampTreeReader timestampTreeReader;
- private final TypeDescription readerType;
private TimestampColumnVector timestampColVector;
- private int precision;
- private int scale;
private DecimalColumnVector decimalColVector;
- DecimalFromTimestampTreeReader(int columnId, TypeDescription readerType,
- boolean skipCorrupt) throws IOException {
+ DecimalFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
super(columnId);
- this.readerType = readerType;
- this.precision = readerType.getPrecision();
- this.scale = readerType.getScale();
timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
setConvertTreeReader(timestampTreeReader);
}
@@ -1293,6 +1295,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
timestampColVector.asScratchTimestamp(elementNum));
HiveDecimal value = HiveDecimal.create(Double.toString(doubleValue));
if (value != null) {
+ // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
decimalColVector.set(elementNum, value);
} else {
decimalColVector.noNulls = false;
@@ -1316,11 +1319,61 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
}
}
+ public static class DecimalFromDecimalTreeReader extends ConvertTreeReader {
+
+ private DecimalTreeReader decimalTreeReader;
+
+ private DecimalColumnVector fileDecimalColVector;
+ private int filePrecision;
+ private int fileScale;
+ private int readerPrecision;
+ private int readerScale;
+ private DecimalColumnVector decimalColVector;
+
+ DecimalFromDecimalTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType)
+ throws IOException {
+ super(columnId);
+ filePrecision = fileType.getPrecision();
+ fileScale = fileType.getScale();
+ readerPrecision = readerType.getPrecision();
+ readerScale = readerType.getScale();
+ decimalTreeReader = new DecimalTreeReader(columnId, filePrecision, fileScale);
+ setConvertTreeReader(decimalTreeReader);
+ }
+
+ @Override
+ public void setConvertVectorElement(int elementNum) throws IOException {
+
+ HiveDecimalWritable valueWritable = HiveDecimalWritable.enforcePrecisionScale(
+ fileDecimalColVector.vector[elementNum], readerPrecision, readerScale);
+ if (valueWritable != null) {
+ decimalColVector.set(elementNum, valueWritable);
+ } else {
+ decimalColVector.noNulls = false;
+ decimalColVector.isNull[elementNum] = true;
+ }
+ }
+
+ @Override
+ public void nextVector(ColumnVector previousVector,
+ boolean[] isNull,
+ final int batchSize) throws IOException {
+ if (fileDecimalColVector == null) {
+ // Allocate column vector for file; cast column vector for reader.
+ fileDecimalColVector = new DecimalColumnVector(filePrecision, fileScale);
+ decimalColVector = (DecimalColumnVector) previousVector;
+ }
+ // Read present/isNull stream
+ decimalTreeReader.nextVector(fileDecimalColVector, isNull, batchSize);
+
+ convertVector(fileDecimalColVector, decimalColVector, batchSize);
+ }
+ }
+
public static class StringGroupFromAnyIntegerTreeReader extends ConvertTreeReader {
private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
- private final TypeDescription fileType;
private final TypeDescription readerType;
private LongColumnVector longColVector;
private BytesColumnVector bytesColVector;
@@ -1328,7 +1381,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
StringGroupFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
TypeDescription readerType, boolean skipCorrupt) throws IOException {
super(columnId);
- this.fileType = fileType;
this.readerType = readerType;
anyIntegerAsLongTreeReader =
new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
@@ -1364,7 +1416,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private FloatTreeReader floatTreeReader;
private final TypeDescription readerType;
- private FloatWritable floatResult;
private DoubleColumnVector doubleColVector;
private BytesColumnVector bytesColVector;
@@ -1375,7 +1426,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
this.readerType = readerType;
floatTreeReader = new FloatTreeReader(columnId);
setConvertTreeReader(floatTreeReader);
- floatResult = new FloatWritable();
}
@Override
@@ -1544,7 +1594,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private final TypeDescription readerType;
private LongColumnVector longColVector;
private BytesColumnVector bytesColVector;
- private DateWritable dateWritableResult;
private Date date;
StringGroupFromDateTreeReader(int columnId, TypeDescription readerType,
@@ -1553,7 +1602,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
this.readerType = readerType;
dateTreeReader = new DateTreeReader(columnId);
setConvertTreeReader(dateTreeReader);
- dateWritableResult = new DateWritable();
date = new Date(0);
}
@@ -1585,13 +1633,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private final TypeDescription readerType;
StringGroupFromStringGroupTreeReader(int columnId, TypeDescription fileType,
TypeDescription readerType) throws IOException {
super(columnId);
- this.fileType = fileType;
this.readerType = readerType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
@@ -1609,8 +1655,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
if (resultColVector.noNulls || !resultColVector.isNull[0]) {
convertStringGroupVectorElement(resultColVector, 0, readerType);
} else {
- resultColVector.noNulls = false;
- resultColVector.isNull[0] = true;
+ // Remains null.
}
} else if (resultColVector.noNulls){
for (int i = 0; i < batchSize; i++) {
@@ -1621,8 +1666,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
if (!resultColVector.isNull[i]) {
convertStringGroupVectorElement(resultColVector, i, readerType);
} else {
- resultColVector.noNulls = false;
- resultColVector.isNull[i] = true;
+ // Remains null.
}
}
}
@@ -1634,7 +1678,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private BinaryTreeReader binaryTreeReader;
private final TypeDescription readerType;
- private BytesWritable binaryWritableResult;
private BytesColumnVector inBytesColVector;
private BytesColumnVector outBytesColVector;
@@ -1644,7 +1687,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
this.readerType = readerType;
binaryTreeReader = new BinaryTreeReader(columnId);
setConvertTreeReader(binaryTreeReader);
- binaryWritableResult = new BytesWritable();
}
@Override
@@ -1725,7 +1767,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private FloatTreeReader floatTreeReader;
- private FloatWritable floatResult;
private DoubleColumnVector doubleColVector;
private TimestampColumnVector timestampColVector;
@@ -1734,14 +1775,14 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
super(columnId);
floatTreeReader = new FloatTreeReader(columnId);
setConvertTreeReader(floatTreeReader);
- floatResult = new FloatWritable();
}
@Override
public void setConvertVectorElement(int elementNum) {
float floatValue = (float) doubleColVector.vector[elementNum];
- timestampColVector.set(elementNum,
- TimestampUtils.doubleToTimestamp(floatValue));
+ Timestamp timestampValue = TimestampUtils.doubleToTimestamp(floatValue);
+ // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+ timestampColVector.set(elementNum, timestampValue);
}
@Override
@@ -1777,8 +1818,9 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
@Override
public void setConvertVectorElement(int elementNum) {
double doubleValue = doubleColVector.vector[elementNum];
- timestampColVector.set(elementNum,
- TimestampUtils.doubleToTimestamp(doubleValue));
+ Timestamp timestampValue = TimestampUtils.doubleToTimestamp(doubleValue);
+ // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+ timestampColVector.set(elementNum, timestampValue);
}
@Override
@@ -1803,7 +1845,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private final int precision;
private final int scale;
- private HiveDecimalWritable hiveDecimalResult;
private DecimalColumnVector decimalColVector;
private TimestampColumnVector timestampColVector;
@@ -1814,14 +1855,14 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
this.scale = fileType.getScale();
decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
setConvertTreeReader(decimalTreeReader);
- hiveDecimalResult = new HiveDecimalWritable();
}
@Override
public void setConvertVectorElement(int elementNum) {
Timestamp timestampValue =
- TimestampUtils.decimalToTimestamp(
- decimalColVector.vector[elementNum].getHiveDecimal());
+ TimestampUtils.decimalToTimestamp(
+ decimalColVector.vector[elementNum].getHiveDecimal());
+ // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
timestampColVector.set(elementNum, timestampValue);
}
@@ -1845,14 +1886,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private BytesColumnVector bytesColVector;
private TimestampColumnVector timestampColVector;
TimestampFromStringGroupTreeReader(int columnId, TypeDescription fileType)
throws IOException {
super(columnId);
- this.fileType = fileType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
}
@@ -1890,7 +1929,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private DateTreeReader dateTreeReader;
- private DateWritable doubleResult;
private LongColumnVector longColVector;
private TimestampColumnVector timestampColVector;
@@ -1899,7 +1937,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
super(columnId);
dateTreeReader = new DateTreeReader(columnId);
setConvertTreeReader(dateTreeReader);
- doubleResult = new DateWritable();
}
@Override
@@ -1929,14 +1966,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
private BytesColumnVector bytesColVector;
private LongColumnVector longColVector;
DateFromStringGroupTreeReader(int columnId, TypeDescription fileType)
throws IOException {
super(columnId);
- this.fileType = fileType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
}
@@ -1974,14 +2009,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TimestampTreeReader timestampTreeReader;
- private final TypeDescription readerType;
private TimestampColumnVector timestampColVector;
private LongColumnVector longColVector;
- DateFromTimestampTreeReader(int columnId, TypeDescription readerType,
- boolean skipCorrupt) throws IOException {
+ DateFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
super(columnId);
- this.readerType = readerType;
timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
setConvertTreeReader(timestampTreeReader);
}
@@ -2014,12 +2046,9 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private TreeReader stringGroupTreeReader;
- private final TypeDescription fileType;
-
BinaryFromStringGroupTreeReader(int columnId, TypeDescription fileType)
throws IOException {
super(columnId);
- this.fileType = fileType;
stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
setConvertTreeReader(stringGroupTreeReader);
}
@@ -2064,7 +2093,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
skipCorrupt);
case DECIMAL:
- return new DecimalFromAnyIntegerTreeReader(columnId, fileType, readerType, skipCorrupt);
+ return new DecimalFromAnyIntegerTreeReader(columnId, fileType, skipCorrupt);
case STRING:
case CHAR:
@@ -2208,7 +2237,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
return new FloatFromDecimalTreeReader(columnId, fileType, readerType);
case DOUBLE:
- return new DoubleFromDecimalTreeReader(columnId, fileType, readerType);
+ return new DoubleFromDecimalTreeReader(columnId, fileType);
case STRING:
case CHAR:
@@ -2424,13 +2453,13 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
return new AnyIntegerFromTimestampTreeReader(columnId, readerType, skipCorrupt);
case FLOAT:
- return new FloatFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+ return new FloatFromTimestampTreeReader(columnId, skipCorrupt);
case DOUBLE:
- return new DoubleFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+ return new DoubleFromTimestampTreeReader(columnId, skipCorrupt);
case DECIMAL:
- return new DecimalFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+ return new DecimalFromTimestampTreeReader(columnId, skipCorrupt);
case STRING:
case CHAR:
@@ -2442,7 +2471,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
readerType.getCategory() + " to self needed");
case DATE:
- return new DateFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+ return new DateFromTimestampTreeReader(columnId, skipCorrupt);
// Not currently supported conversion(s):
case BINARY:
http://git-wip-us.apache.org/repos/asf/orc/blob/17e14f62/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
index 189ead5..41db9ca 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
@@ -39,45 +39,53 @@ public class TimestampUtils {
}
public static Timestamp doubleToTimestamp(double f) {
- long seconds = (long) f;
-
- // We must ensure the exactness of the double's fractional portion.
- // 0.6 as the fraction part will be converted to 0.59999... and
- // significantly reduce the savings from binary serialization
- BigDecimal bd;
try {
- bd = new BigDecimal(String.valueOf(f));
+ long seconds = (long) f;
+
+ // We must ensure the exactness of the double's fractional portion.
+ // 0.6 as the fraction part will be converted to 0.59999... and
+ // significantly reduce the savings from binary serialization
+ BigDecimal bd = new BigDecimal(String.valueOf(f));
+
+ bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
+ int nanos = bd.intValue();
+
+ // Convert to millis
+ long millis = seconds * 1000;
+ if (nanos < 0) {
+ millis -= 1000;
+ nanos += 1000000000;
+ }
+ Timestamp t = new Timestamp(millis);
+
+ // Set remaining fractional portion to nanos
+ t.setNanos(nanos);
+ return t;
} catch (NumberFormatException nfe) {
return null;
+ } catch (IllegalArgumentException iae) {
+ return null;
}
- bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
- int nanos = bd.intValue();
-
- // Convert to millis
- long millis = seconds * 1000;
- if (nanos < 0) {
- millis -= 1000;
- nanos += 1000000000;
- }
- Timestamp t = new Timestamp(millis);
-
- // Set remaining fractional portion to nanos
- t.setNanos(nanos);
- return t;
}
public static Timestamp decimalToTimestamp(HiveDecimal d) {
- BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL);
- int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue();
- if (nanos < 0) {
- nanos += 1000000000;
- }
- long seconds =
- nanoInstant.subtract(new BigDecimal(nanos)).divide(BILLION_BIG_DECIMAL).longValue();
- Timestamp t = new Timestamp(seconds * 1000);
- t.setNanos(nanos);
+ try {
+ BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL);
+ int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue();
+ if (nanos < 0) {
+ nanos += 1000000000;
+ }
+ long seconds =
+ nanoInstant.subtract(new BigDecimal(nanos)).divide(BILLION_BIG_DECIMAL).longValue();
+ Timestamp t = new Timestamp(seconds * 1000);
+ t.setNanos(nanos);
- return t;
+ return t;
+ } catch (NumberFormatException nfe) {
+ return null;
+ } catch (IllegalArgumentException iae) {
+ return null;
+ }
}
/**
[5/7] orc git commit: HIVE-13648. ORC schema evolution doesn't
support the same type conversions for varchar, char,
or decimal when max length, precision, or scale are different.
Posted by om...@apache.org.
HIVE-13648. ORC schema evolution doesn't support the same type conversions for
varchar, char, or decimal when max length, precision, or scale are different.
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/047265cd
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/047265cd
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/047265cd
Branch: refs/heads/master
Commit: 047265cd167cbb1d17b596a77ba5b3b8f868cdb2
Parents: e8c0eb5
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:26:24 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:58 2016 -0700
----------------------------------------------------------------------
.../orc/impl/ConvertTreeReaderFactory.java | 28 +++++++++++---------
.../org/apache/orc/impl/SchemaEvolution.java | 6 ++---
.../org/apache/orc/impl/TreeReaderFactory.java | 2 +-
3 files changed, 18 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/047265cd/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 753e5bc..eda47d3 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -257,7 +257,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
bytesColVector.vector[elementNum],
bytesColVector.start[elementNum], bytesColVector.length[elementNum],
StandardCharsets.UTF_8);
-
+
return string;
}
@@ -1323,6 +1323,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
private DecimalTreeReader decimalTreeReader;
+ private final TypeDescription fileType;
+ private final TypeDescription readerType;
private DecimalColumnVector fileDecimalColVector;
private int filePrecision;
private int fileScale;
@@ -1333,8 +1335,10 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
DecimalFromDecimalTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType)
throws IOException {
super(columnId);
+ this.fileType = fileType;
filePrecision = fileType.getPrecision();
fileScale = fileType.getScale();
+ this.readerType = readerType;
readerPrecision = readerType.getPrecision();
readerScale = readerType.getScale();
decimalTreeReader = new DecimalTreeReader(columnId, filePrecision, fileScale);
@@ -2248,7 +2252,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
return new TimestampFromDecimalTreeReader(columnId, fileType, skipCorrupt);
case DECIMAL:
- // UNDONE: Decimal to Decimal conversion????
+ return new DecimalFromDecimalTreeReader(columnId, fileType, readerType);
// Not currently supported conversion(s):
case BINARY:
@@ -2354,8 +2358,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
case CHAR:
- throw new IllegalArgumentException("No conversion of type " +
- readerType.getCategory() + " to self needed");
+ return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
case BINARY:
return new BinaryFromStringGroupTreeReader(columnId, fileType);
@@ -2411,8 +2414,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
case VARCHAR:
- throw new IllegalArgumentException("No conversion of type " +
- readerType.getCategory() + " to self needed");
+ return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
case BINARY:
return new BinaryFromStringGroupTreeReader(columnId, fileType);
@@ -2628,11 +2630,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
* StringGroupFromFloatTreeReader (written)
* StringGroupFromDoubleTreeReader (written)
* StringGroupFromDecimalTreeReader (written)
- *
+ *
* String from Char/Varchar conversion
* Char from String/Varchar conversion
* Varchar from String/Char conversion
- *
+ *
* StringGroupFromTimestampTreeReader (written)
* StringGroupFromDateTreeReader (written)
* StringGroupFromBinaryTreeReader *****
@@ -2650,7 +2652,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
* TimestampFromDecimalTreeeReader (written)
* TimestampFromStringGroupTreeReader (written)
* TimestampFromDateTreeReader
- *
+ *
*
* To DATE:
* Convert from (STRING, CHAR, VARCHAR) using string conversion.
@@ -2780,7 +2782,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
// Fall through.
}
- // Now look for the few cases we don't convert from
+ // Now look for the few cases we don't convert from
switch (fileType.getCategory()) {
case BOOLEAN:
@@ -2799,8 +2801,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
default:
return true;
}
-
-
+
+
case STRING:
case CHAR:
case VARCHAR:
@@ -2836,7 +2838,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
default:
return true;
}
-
+
case BINARY:
switch (readerType.getCategory()) {
// Not currently supported conversion(s):
http://git-wip-us.apache.org/repos/asf/orc/blob/047265cd/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 68000d6..07b527d 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -100,12 +100,10 @@ public class SchemaEvolution {
break;
case CHAR:
case VARCHAR:
- // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL)
- isOk = fileType.getMaxLength() == readerType.getMaxLength();
+ // We do conversion when same CHAR/VARCHAR type but different maxLength.
break;
case DECIMAL:
- // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL)
- // TODO we don't enforce scale and precision checks, but probably should
+ // We do conversion when same DECIMAL type but different precision/scale.
break;
case UNION:
case MAP:
http://git-wip-us.apache.org/repos/asf/orc/blob/047265cd/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 6c8ecfd..5901c8c 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -2034,7 +2034,7 @@ public class TreeReaderFactory {
return new NullTreeReader(0);
}
TypeDescription.Category readerTypeCategory = readerType.getCategory();
- if (!fileType.getCategory().equals(readerTypeCategory) &&
+ if (!fileType.equals(readerType) &&
(readerTypeCategory != TypeDescription.Category.STRUCT &&
readerTypeCategory != TypeDescription.Category.MAP &&
readerTypeCategory != TypeDescription.Category.LIST &&