You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/02/03 16:38:19 UTC
[20/22] hive git commit: HIVE-14007. Replace hive-orc module with ORC
1.3.1
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/BinaryColumnStatistics.java b/orc/src/java/org/apache/orc/BinaryColumnStatistics.java
deleted file mode 100644
index 19db98a..0000000
--- a/orc/src/java/org/apache/orc/BinaryColumnStatistics.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for binary columns.
- */
-public interface BinaryColumnStatistics extends ColumnStatistics {
- long getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/BloomFilterIO.java b/orc/src/java/org/apache/orc/BloomFilterIO.java
deleted file mode 100644
index 1406266..0000000
--- a/orc/src/java/org/apache/orc/BloomFilterIO.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hive.common.util.BloomFilter;
-
-import com.google.common.primitives.Longs;
-
-public class BloomFilterIO extends BloomFilter {
-
- public BloomFilterIO(long expectedEntries) {
- super(expectedEntries, DEFAULT_FPP);
- }
-
- public BloomFilterIO(long expectedEntries, double fpp) {
- super(expectedEntries, fpp);
- }
-
-/**
- * Initializes the BloomFilter from the given Orc BloomFilter
- */
- public BloomFilterIO(OrcProto.BloomFilter bloomFilter) {
- this.bitSet = new BitSet(Longs.toArray(bloomFilter.getBitsetList()));
- this.numHashFunctions = bloomFilter.getNumHashFunctions();
- this.numBits = (int) this.bitSet.bitSize();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/BooleanColumnStatistics.java b/orc/src/java/org/apache/orc/BooleanColumnStatistics.java
deleted file mode 100644
index af08f06..0000000
--- a/orc/src/java/org/apache/orc/BooleanColumnStatistics.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for boolean columns.
- */
-public interface BooleanColumnStatistics extends ColumnStatistics {
- long getFalseCount();
-
- long getTrueCount();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/ColumnStatistics.java b/orc/src/java/org/apache/orc/ColumnStatistics.java
deleted file mode 100644
index 72d8fbf..0000000
--- a/orc/src/java/org/apache/orc/ColumnStatistics.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-/**
- * Statistics that are available for all types of columns.
- */
-public interface ColumnStatistics {
- /**
- * Get the number of values in this column. It will differ from the number
- * of rows because of NULL values and repeated values.
- * @return the number of values
- */
- long getNumberOfValues();
-
- /**
- * Returns true if there are nulls in the scope of column statistics.
- * @return true if null present else false
- */
- boolean hasNull();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/CompressionCodec.java b/orc/src/java/org/apache/orc/CompressionCodec.java
deleted file mode 100644
index 3421969..0000000
--- a/orc/src/java/org/apache/orc/CompressionCodec.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-import javax.annotation.Nullable;
-
-public interface CompressionCodec {
-
- enum Modifier {
- /* speed/compression tradeoffs */
- FASTEST,
- FAST,
- DEFAULT,
- /* data sensitivity modifiers */
- TEXT,
- BINARY
- };
-
- /**
- * Compress the in buffer to the out buffer.
- * @param in the bytes to compress
- * @param out the uncompressed bytes
- * @param overflow put any additional bytes here
- * @return true if the output is smaller than input
- * @throws IOException
- */
- boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
- ) throws IOException;
-
- /**
- * Decompress the in buffer to the out buffer.
- * @param in the bytes to decompress
- * @param out the decompressed bytes
- * @throws IOException
- */
- void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
-
- /**
- * Produce a modified compression codec if the underlying algorithm allows
- * modification.
- *
- * This does not modify the current object, but returns a new object if
- * modifications are possible. Returns the same object if no modifications
- * are possible.
- * @param modifiers compression modifiers
- * @return codec for use after optional modification
- */
- CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/CompressionKind.java b/orc/src/java/org/apache/orc/CompressionKind.java
deleted file mode 100644
index f684bef..0000000
--- a/orc/src/java/org/apache/orc/CompressionKind.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-/**
- * An enumeration that lists the generic compression algorithms that
- * can be applied to ORC files.
- */
-public enum CompressionKind {
- NONE, ZLIB, SNAPPY, LZO
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/DataReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DataReader.java b/orc/src/java/org/apache/orc/DataReader.java
deleted file mode 100644
index a5dbb76..0000000
--- a/orc/src/java/org/apache/orc/DataReader.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.orc.impl.OrcIndex;
-
-/** An abstract data reader that IO formats can use to read bytes from underlying storage. */
-public interface DataReader extends AutoCloseable {
-
- /** Opens the DataReader, making it ready to use. */
- void open() throws IOException;
-
- OrcIndex readRowIndex(StripeInformation stripe,
- OrcProto.StripeFooter footer,
- boolean[] included, OrcProto.RowIndex[] indexes,
- boolean[] sargColumns,
- OrcProto.BloomFilterIndex[] bloomFilterIndices
- ) throws IOException;
-
- OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException;
-
- /** Reads the data.
- *
- * Note that for the cases such as zero-copy read, caller must release the disk ranges
- * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed.
- * @param range List if disk ranges to read. Ranges with data will be ignored.
- * @param baseOffset Base offset from the start of the file of the ranges in disk range list.
- * @param doForceDirect Whether the data should be read into direct buffers.
- * @return New or modified list of DiskRange-s, where all the ranges are filled with data.
- */
- DiskRangeList readFileData(
- DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException;
-
-
- /**
- * Whether the user should release buffers created by readFileData. See readFileData javadoc.
- */
- boolean isTrackingDiskRanges();
-
- /**
- * Releases buffers created by readFileData. See readFileData javadoc.
- * @param toRelease The buffer to release.
- */
- void releaseBuffer(ByteBuffer toRelease);
-
- /**
- * Clone the entire state of the DataReader with the assumption that the
- * clone will be closed at a different time. Thus, any file handles in the
- * implementation need to be cloned.
- * @return a new instance
- */
- DataReader clone();
-
- @Override
- public void close() throws IOException;
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DateColumnStatistics.java b/orc/src/java/org/apache/orc/DateColumnStatistics.java
deleted file mode 100644
index cdd01af..0000000
--- a/orc/src/java/org/apache/orc/DateColumnStatistics.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-import java.util.Date;
-
-/**
- * Statistics for DATE columns.
- */
-public interface DateColumnStatistics extends ColumnStatistics {
- /**
- * Get the minimum value for the column.
- * @return minimum value
- */
- Date getMinimum();
-
- /**
- * Get the maximum value for the column.
- * @return maximum value
- */
- Date getMaximum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DecimalColumnStatistics.java b/orc/src/java/org/apache/orc/DecimalColumnStatistics.java
deleted file mode 100644
index 51b6d7d..0000000
--- a/orc/src/java/org/apache/orc/DecimalColumnStatistics.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for decimal columns.
- */
-public interface DecimalColumnStatistics extends ColumnStatistics {
-
- /**
- * Get the minimum value for the column.
- * @return the minimum value
- */
- HiveDecimal getMinimum();
-
- /**
- * Get the maximum value for the column.
- * @return the maximum value
- */
- HiveDecimal getMaximum();
-
- /**
- * Get the sum of the values of the column.
- * @return the sum
- */
- HiveDecimal getSum();
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DoubleColumnStatistics.java b/orc/src/java/org/apache/orc/DoubleColumnStatistics.java
deleted file mode 100644
index 00c728f..0000000
--- a/orc/src/java/org/apache/orc/DoubleColumnStatistics.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for float and double columns.
- */
-public interface DoubleColumnStatistics extends ColumnStatistics {
-
- /**
- * Get the smallest value in the column. Only defined if getNumberOfValues
- * is non-zero.
- * @return the minimum
- */
- double getMinimum();
-
- /**
- * Get the largest value in the column. Only defined if getNumberOfValues
- * is non-zero.
- * @return the maximum
- */
- double getMaximum();
-
- /**
- * Get the sum of the values in the column.
- * @return the sum
- */
- double getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/FileFormatException.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/FileFormatException.java b/orc/src/java/org/apache/orc/FileFormatException.java
deleted file mode 100644
index 2cebea7..0000000
--- a/orc/src/java/org/apache/orc/FileFormatException.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-
-/**
- * Thrown when an invalid file format is encountered.
- */
-public class FileFormatException extends IOException {
-
- public FileFormatException(String errMsg) {
- super(errMsg);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/FileMetadata.java b/orc/src/java/org/apache/orc/FileMetadata.java
deleted file mode 100644
index 807e696..0000000
--- a/orc/src/java/org/apache/orc/FileMetadata.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.util.List;
-
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-
-/**
- * Cached file metadata. Right now, it caches everything; we don't have to store all the
- * protobuf structs actually, we could just store what we need, but that would require that
- * ORC stop depending on them too. Luckily, they shouldn't be very big.
- */
-public interface FileMetadata {
- boolean isOriginalFormat();
-
- List<StripeInformation> getStripes();
-
- CompressionKind getCompressionKind();
-
- int getCompressionBufferSize();
-
- int getRowIndexStride();
-
- int getColumnCount();
-
- int getFlattenedColumnCount();
-
- Object getFileKey();
-
- List<Integer> getVersionList();
-
- int getMetadataSize();
-
- int getWriterVersionNum();
-
- List<OrcProto.Type> getTypes();
-
- List<OrcProto.StripeStatistics> getStripeStats();
-
- long getContentLength();
-
- long getNumberOfRows();
-
- List<OrcProto.ColumnStatistics> getFileStats();
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/IntegerColumnStatistics.java b/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
deleted file mode 100644
index 1a162ff..0000000
--- a/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for all of the integer columns, such as byte, short, int, and
- * long.
- */
-public interface IntegerColumnStatistics extends ColumnStatistics {
- /**
- * Get the smallest value in the column. Only defined if getNumberOfValues
- * is non-zero.
- * @return the minimum
- */
- long getMinimum();
-
- /**
- * Get the largest value in the column. Only defined if getNumberOfValues
- * is non-zero.
- * @return the maximum
- */
- long getMaximum();
-
- /**
- * Is the sum defined? If the sum overflowed the counter this will be false.
- * @return is the sum available
- */
- boolean isSumDefined();
-
- /**
- * Get the sum of the column. Only valid if isSumDefined returns true.
- * @return the sum of the column
- */
- long getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcConf.java b/orc/src/java/org/apache/orc/OrcConf.java
deleted file mode 100644
index 357318d..0000000
--- a/orc/src/java/org/apache/orc/OrcConf.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.conf.Configuration;
-
-import java.util.Properties;
-
-/**
- * Define the configuration properties that Orc understands.
- */
-public enum OrcConf {
- STRIPE_SIZE("orc.stripe.size", "hive.exec.orc.default.stripe.size",
- 64L * 1024 * 1024,
- "Define the default ORC stripe size, in bytes."),
- BLOCK_SIZE("orc.block.size", "hive.exec.orc.default.block.size",
- 256L * 1024 * 1024,
- "Define the default file system block size for ORC files."),
- ENABLE_INDEXES("orc.create.index", "orc.create.index", true,
- "Should the ORC writer create indexes as part of the file."),
- ROW_INDEX_STRIDE("orc.row.index.stride",
- "hive.exec.orc.default.row.index.stride", 10000,
- "Define the default ORC index stride in number of rows. (Stride is the\n"+
- " number of rows n index entry represents.)"),
- BUFFER_SIZE("orc.compress.size", "hive.exec.orc.default.buffer.size",
- 256 * 1024, "Define the default ORC buffer size, in bytes."),
- BASE_DELTA_RATIO("orc.base.delta.ratio", "hive.exec.orc.base.delta.ratio", 8,
- "The ratio of base writer and delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
- BLOCK_PADDING("orc.block.padding", "hive.exec.orc.default.block.padding",
- true,
- "Define whether stripes should be padded to the HDFS block boundaries."),
- COMPRESS("orc.compress", "hive.exec.orc.default.compress", "ZLIB",
- "Define the default compression codec for ORC file"),
- WRITE_FORMAT("orc.write.format", "hive.exec.orc.write.format", "0.12",
- "Define the version of the file to write. Possible values are 0.11 and\n"+
- " 0.12. If this parameter is not defined, ORC will use the run\n" +
- " length encoding (RLE) introduced in Hive 0.12."),
- ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy",
- "SPEED",
- "Define the encoding strategy to use while writing data. Changing this\n"+
- "will only affect the light weight encoding for integers. This\n" +
- "flag will not change the compression level of higher level\n" +
- "compression codec (like ZLIB)."),
- COMPRESSION_STRATEGY("orc.compression.strategy",
- "hive.exec.orc.compression.strategy", "SPEED",
- "Define the compression strategy to use while writing data.\n" +
- "This changes the compression level of higher level compression\n" +
- "codec (like ZLIB)."),
- BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
- "hive.exec.orc.block.padding.tolerance", 0.05,
- "Define the tolerance for block padding as a decimal fraction of\n" +
- "stripe size (for example, the default value 0.05 is 5% of the\n" +
- "stripe size). For the defaults of 64Mb ORC stripe and 256Mb HDFS\n" +
- "blocks, the default block padding tolerance of 5% will\n" +
- "reserve a maximum of 3.2Mb for padding within the 256Mb block.\n" +
- "In that case, if the available size within the block is more than\n"+
- "3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
- "space. This will make sure that no stripe written will block\n" +
- " boundaries and cause remote reads within a node local task."),
- BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
- "Define the default false positive probability for bloom filters."),
- USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
- "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
- SKIP_CORRUPT_DATA("orc.skip.corrupt.data", "hive.exec.orc.skip.corrupt.data",
- false,
- "If ORC reader encounters corrupt data, this value will be used to\n" +
- "determine whether to skip the corrupt data or throw exception.\n" +
- "The default behavior is to throw exception."),
- MEMORY_POOL("orc.memory.pool", "hive.exec.orc.memory.pool", 0.5,
- "Maximum fraction of heap that can be used by ORC file writers"),
- DICTIONARY_KEY_SIZE_THRESHOLD("orc.dictionary.key.threshold",
- "hive.exec.orc.dictionary.key.size.threshold",
- 0.8,
- "If the number of distinct keys in a dictionary is greater than this\n" +
- "fraction of the total number of non-null rows, turn off \n" +
- "dictionary encoding. Use 1 to always use dictionary encoding."),
- ROW_INDEX_STRIDE_DICTIONARY_CHECK("orc.dictionary.early.check",
- "hive.orc.row.index.stride.dictionary.check",
- true,
- "If enabled dictionary check will happen after first row index stride\n" +
- "(default 10000 rows) else dictionary check will happen before\n" +
- "writing first stripe. In both cases, the decision to use\n" +
- "dictionary or not will be retained thereafter."),
- BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns", "orc.bloom.filter.columns",
- "", "List of columns to create bloom filters for when writing.")
- ;
-
- private final String attribute;
- private final String hiveConfName;
- private final Object defaultValue;
- private final String description;
-
- OrcConf(String attribute,
- String hiveConfName,
- Object defaultValue,
- String description) {
- this.attribute = attribute;
- this.hiveConfName = hiveConfName;
- this.defaultValue = defaultValue;
- this.description = description;
- }
-
- public String getAttribute() {
- return attribute;
- }
-
- public String getHiveConfName() {
- return hiveConfName;
- }
-
- public Object getDefaultValue() {
- return defaultValue;
- }
-
- public String getDescription() {
- return description;
- }
-
- private String lookupValue(Properties tbl, Configuration conf) {
- String result = null;
- if (tbl != null) {
- result = tbl.getProperty(attribute);
- }
- if (result == null && conf != null) {
- result = conf.get(attribute);
- if (result == null) {
- result = conf.get(hiveConfName);
- }
- }
- return result;
- }
-
- public long getLong(Properties tbl, Configuration conf) {
- String value = lookupValue(tbl, conf);
- if (value != null) {
- return Long.parseLong(value);
- }
- return ((Number) defaultValue).longValue();
- }
-
- public long getLong(Configuration conf) {
- return getLong(null, conf);
- }
-
- public String getString(Properties tbl, Configuration conf) {
- String value = lookupValue(tbl, conf);
- return value == null ? (String) defaultValue : value;
- }
-
- public String getString(Configuration conf) {
- return getString(null, conf);
- }
-
- public boolean getBoolean(Properties tbl, Configuration conf) {
- String value = lookupValue(tbl, conf);
- if (value != null) {
- return Boolean.parseBoolean(value);
- }
- return (Boolean) defaultValue;
- }
-
- public boolean getBoolean(Configuration conf) {
- return getBoolean(null, conf);
- }
-
- public double getDouble(Properties tbl, Configuration conf) {
- String value = lookupValue(tbl, conf);
- if (value != null) {
- return Double.parseDouble(value);
- }
- return ((Number) defaultValue).doubleValue();
- }
-
- public double getDouble(Configuration conf) {
- return getDouble(null, conf);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcFile.java b/orc/src/java/org/apache/orc/OrcFile.java
deleted file mode 100644
index ddfa9f7..0000000
--- a/orc/src/java/org/apache/orc/OrcFile.java
+++ /dev/null
@@ -1,566 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.impl.MemoryManager;
-import org.apache.orc.impl.OrcTail;
-import org.apache.orc.impl.ReaderImpl;
-import org.apache.orc.impl.WriterImpl;
-
-/**
- * Contains factory methods to read or write ORC files.
- */
-public class OrcFile {
- public static final String MAGIC = "ORC";
-
- /**
- * Create a version number for the ORC file format, so that we can add
- * non-forward compatible changes in the future. To make it easier for users
- * to understand the version numbers, we use the Hive release number that
- * first wrote that version of ORC files.
- *
- * Thus, if you add new encodings or other non-forward compatible changes
- * to ORC files, which prevent the old reader from reading the new format,
- * you should change these variable to reflect the next Hive release number.
- * Non-forward compatible changes should never be added in patch releases.
- *
- * Do not make any changes that break backwards compatibility, which would
- * prevent the new reader from reading ORC files generated by any released
- * version of Hive.
- */
- public enum Version {
- V_0_11("0.11", 0, 11),
- V_0_12("0.12", 0, 12);
-
- public static final Version CURRENT = V_0_12;
-
- private final String name;
- private final int major;
- private final int minor;
-
- Version(String name, int major, int minor) {
- this.name = name;
- this.major = major;
- this.minor = minor;
- }
-
- public static Version byName(String name) {
- for(Version version: values()) {
- if (version.name.equals(name)) {
- return version;
- }
- }
- throw new IllegalArgumentException("Unknown ORC version " + name);
- }
-
- /**
- * Get the human readable name for the version.
- */
- public String getName() {
- return name;
- }
-
- /**
- * Get the major version number.
- */
- public int getMajor() {
- return major;
- }
-
- /**
- * Get the minor version number.
- */
- public int getMinor() {
- return minor;
- }
- }
-
- /**
- * Records the version of the writer in terms of which bugs have been fixed.
- * For bugs in the writer, but the old readers already read the new data
- * correctly, bump this version instead of the Version.
- */
- public enum WriterVersion {
- ORIGINAL(0),
- HIVE_8732(1), // corrupted stripe/file maximum column statistics
- HIVE_4243(2), // use real column names from Hive tables
- HIVE_12055(3), // vectorized writer
- HIVE_13083(4), // decimal writer updating present stream wrongly
-
- // Don't use any magic numbers here except for the below:
- FUTURE(Integer.MAX_VALUE); // a version from a future writer
-
- private final int id;
-
- public int getId() {
- return id;
- }
-
- WriterVersion(int id) {
- this.id = id;
- }
-
- private static final WriterVersion[] values;
- static {
- // Assumes few non-negative values close to zero.
- int max = Integer.MIN_VALUE;
- for (WriterVersion v : WriterVersion.values()) {
- if (v.id < 0) throw new AssertionError();
- if (v.id > max && FUTURE.id != v.id) {
- max = v.id;
- }
- }
- values = new WriterVersion[max + 1];
- for (WriterVersion v : WriterVersion.values()) {
- if (v.id < values.length) {
- values[v.id] = v;
- }
- }
- }
-
- public static WriterVersion from(int val) {
- if (val == FUTURE.id) return FUTURE; // Special handling for the magic value.
- return values[val];
- }
- }
- public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_13083;
-
- public enum EncodingStrategy {
- SPEED, COMPRESSION
- }
-
- public enum CompressionStrategy {
- SPEED, COMPRESSION
- }
-
- // unused
- protected OrcFile() {}
-
- public static class ReaderOptions {
- private final Configuration conf;
- private FileSystem filesystem;
- private long maxLength = Long.MAX_VALUE;
- private OrcTail orcTail;
- // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
- // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
- // For now keeping this around to avoid complex surgery
- private FileMetadata fileMetadata;
-
- public ReaderOptions(Configuration conf) {
- this.conf = conf;
- }
-
- public ReaderOptions filesystem(FileSystem fs) {
- this.filesystem = fs;
- return this;
- }
-
- public ReaderOptions maxLength(long val) {
- maxLength = val;
- return this;
- }
-
- public ReaderOptions orcTail(OrcTail tail) {
- this.orcTail = tail;
- return this;
- }
-
- public Configuration getConfiguration() {
- return conf;
- }
-
- public FileSystem getFilesystem() {
- return filesystem;
- }
-
- public long getMaxLength() {
- return maxLength;
- }
-
- public OrcTail getOrcTail() {
- return orcTail;
- }
-
- public ReaderOptions fileMetadata(final FileMetadata metadata) {
- fileMetadata = metadata;
- return this;
- }
-
- public FileMetadata getFileMetadata() {
- return fileMetadata;
- }
- }
-
- public static ReaderOptions readerOptions(Configuration conf) {
- return new ReaderOptions(conf);
- }
-
- public static Reader createReader(Path path,
- ReaderOptions options) throws IOException {
- return new ReaderImpl(path, options);
- }
-
- public interface WriterContext {
- Writer getWriter();
- }
-
- public interface WriterCallback {
- void preStripeWrite(WriterContext context) throws IOException;
- void preFooterWrite(WriterContext context) throws IOException;
- }
-
- /**
- * Options for creating ORC file writers.
- */
- public static class WriterOptions {
- private final Configuration configuration;
- private FileSystem fileSystemValue = null;
- private TypeDescription schema = null;
- private long stripeSizeValue;
- private long blockSizeValue;
- private int rowIndexStrideValue;
- private int bufferSizeValue;
- private boolean enforceBufferSize = false;
- private boolean blockPaddingValue;
- private CompressionKind compressValue;
- private MemoryManager memoryManagerValue;
- private Version versionValue;
- private WriterCallback callback;
- private EncodingStrategy encodingStrategy;
- private CompressionStrategy compressionStrategy;
- private double paddingTolerance;
- private String bloomFilterColumns;
- private double bloomFilterFpp;
-
- protected WriterOptions(Properties tableProperties, Configuration conf) {
- configuration = conf;
- memoryManagerValue = getStaticMemoryManager(conf);
- stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
- blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
- rowIndexStrideValue =
- (int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
- bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties,
- conf);
- blockPaddingValue =
- OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
- compressValue =
- CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
- conf).toUpperCase());
- String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
- conf);
- versionValue = Version.byName(versionName);
- String enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties,
- conf);
- encodingStrategy = EncodingStrategy.valueOf(enString);
-
- String compString =
- OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
- compressionStrategy = CompressionStrategy.valueOf(compString);
-
- paddingTolerance =
- OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
-
- bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
- conf);
- bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
- conf);
- }
-
- /**
- * Provide the filesystem for the path, if the client has it available.
- * If it is not provided, it will be found from the path.
- */
- public WriterOptions fileSystem(FileSystem value) {
- fileSystemValue = value;
- return this;
- }
-
- /**
- * Set the stripe size for the file. The writer stores the contents of the
- * stripe in memory until this memory limit is reached and the stripe
- * is flushed to the HDFS file and the next stripe started.
- */
- public WriterOptions stripeSize(long value) {
- stripeSizeValue = value;
- return this;
- }
-
- /**
- * Set the file system block size for the file. For optimal performance,
- * set the block size to be multiple factors of stripe size.
- */
- public WriterOptions blockSize(long value) {
- blockSizeValue = value;
- return this;
- }
-
- /**
- * Set the distance between entries in the row index. The minimum value is
- * 1000 to prevent the index from overwhelming the data. If the stride is
- * set to 0, no indexes will be included in the file.
- */
- public WriterOptions rowIndexStride(int value) {
- rowIndexStrideValue = value;
- return this;
- }
-
- /**
- * The size of the memory buffers used for compressing and storing the
- * stripe in memory. NOTE: ORC writer may choose to use smaller buffer
- * size based on stripe size and number of columns for efficient stripe
- * writing and memory utilization. To enforce writer to use the requested
- * buffer size use enforceBufferSize().
- */
- public WriterOptions bufferSize(int value) {
- bufferSizeValue = value;
- return this;
- }
-
- /**
- * Enforce writer to use requested buffer size instead of estimating
- * buffer size based on stripe size and number of columns.
- * See bufferSize() method for more info.
- * Default: false
- */
- public WriterOptions enforceBufferSize() {
- enforceBufferSize = true;
- return this;
- }
-
- /**
- * Sets whether the HDFS blocks are padded to prevent stripes from
- * straddling blocks. Padding improves locality and thus the speed of
- * reading, but costs space.
- */
- public WriterOptions blockPadding(boolean value) {
- blockPaddingValue = value;
- return this;
- }
-
- /**
- * Sets the encoding strategy that is used to encode the data.
- */
- public WriterOptions encodingStrategy(EncodingStrategy strategy) {
- encodingStrategy = strategy;
- return this;
- }
-
- /**
- * Sets the tolerance for block padding as a percentage of stripe size.
- */
- public WriterOptions paddingTolerance(double value) {
- paddingTolerance = value;
- return this;
- }
-
- /**
- * Comma separated values of column names for which bloom filter is to be created.
- */
- public WriterOptions bloomFilterColumns(String columns) {
- bloomFilterColumns = columns;
- return this;
- }
-
- /**
- * Specify the false positive probability for bloom filter.
- * @param fpp - false positive probability
- * @return this
- */
- public WriterOptions bloomFilterFpp(double fpp) {
- bloomFilterFpp = fpp;
- return this;
- }
-
- /**
- * Sets the generic compression that is used to compress the data.
- */
- public WriterOptions compress(CompressionKind value) {
- compressValue = value;
- return this;
- }
-
- /**
- * Set the schema for the file. This is a required parameter.
- * @param schema the schema for the file.
- * @return this
- */
- public WriterOptions setSchema(TypeDescription schema) {
- this.schema = schema;
- return this;
- }
-
- /**
- * Sets the version of the file that will be written.
- */
- public WriterOptions version(Version value) {
- versionValue = value;
- return this;
- }
-
- /**
- * Add a listener for when the stripe and file are about to be closed.
- * @param callback the object to be called when the stripe is closed
- * @return this
- */
- public WriterOptions callback(WriterCallback callback) {
- this.callback = callback;
- return this;
- }
-
- /**
- * A package local option to set the memory manager.
- */
- protected WriterOptions memory(MemoryManager value) {
- memoryManagerValue = value;
- return this;
- }
-
- public boolean getBlockPadding() {
- return blockPaddingValue;
- }
-
- public long getBlockSize() {
- return blockSizeValue;
- }
-
- public String getBloomFilterColumns() {
- return bloomFilterColumns;
- }
-
- public FileSystem getFileSystem() {
- return fileSystemValue;
- }
-
- public Configuration getConfiguration() {
- return configuration;
- }
-
- public TypeDescription getSchema() {
- return schema;
- }
-
- public long getStripeSize() {
- return stripeSizeValue;
- }
-
- public CompressionKind getCompress() {
- return compressValue;
- }
-
- public WriterCallback getCallback() {
- return callback;
- }
-
- public Version getVersion() {
- return versionValue;
- }
-
- public MemoryManager getMemoryManager() {
- return memoryManagerValue;
- }
-
- public int getBufferSize() {
- return bufferSizeValue;
- }
-
- public boolean isEnforceBufferSize() {
- return enforceBufferSize;
- }
-
- public int getRowIndexStride() {
- return rowIndexStrideValue;
- }
-
- public CompressionStrategy getCompressionStrategy() {
- return compressionStrategy;
- }
-
- public EncodingStrategy getEncodingStrategy() {
- return encodingStrategy;
- }
-
- public double getPaddingTolerance() {
- return paddingTolerance;
- }
-
- public double getBloomFilterFpp() {
- return bloomFilterFpp;
- }
- }
-
- /**
- * Create a set of writer options based on a configuration.
- * @param conf the configuration to use for values
- * @return A WriterOptions object that can be modified
- */
- public static WriterOptions writerOptions(Configuration conf) {
- return new WriterOptions(null, conf);
- }
-
- /**
- * Create a set of write options based on a set of table properties and
- * configuration.
- * @param tableProperties the properties of the table
- * @param conf the configuration of the query
- * @return a WriterOptions object that can be modified
- */
- public static WriterOptions writerOptions(Properties tableProperties,
- Configuration conf) {
- return new WriterOptions(tableProperties, conf);
- }
-
- private static ThreadLocal<MemoryManager> memoryManager = null;
-
- private static synchronized MemoryManager getStaticMemoryManager(
- final Configuration conf) {
- if (memoryManager == null) {
- memoryManager = new ThreadLocal<MemoryManager>() {
- @Override
- protected MemoryManager initialValue() {
- return new MemoryManager(conf);
- }
- };
- }
- return memoryManager.get();
- }
-
- /**
- * Create an ORC file writer. This is the public interface for creating
- * writers going forward and new options will only be added to this method.
- * @param path filename to write to
- * @param opts the options
- * @return a new ORC file writer
- * @throws IOException
- */
- public static Writer createWriter(Path path,
- WriterOptions opts
- ) throws IOException {
- FileSystem fs = opts.getFileSystem() == null ?
- path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
-
- return new WriterImpl(fs, path, opts);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcUtils.java b/orc/src/java/org/apache/orc/OrcUtils.java
deleted file mode 100644
index 4f02926..0000000
--- a/orc/src/java/org/apache/orc/OrcUtils.java
+++ /dev/null
@@ -1,624 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.orc.OrcProto.Type.Builder;
-import org.apache.orc.impl.ReaderImpl;
-
-import com.google.common.collect.Lists;
-
-public class OrcUtils {
-
- /**
- * Returns selected columns as a boolean array with true value set for specified column names.
- * The result will contain number of elements equal to flattened number of columns.
- * For example:
- * selectedColumns - a,b,c
- * allColumns - a,b,c,d
- * If column c is a complex type, say list<string> and other types are primitives then result will
- * be [false, true, true, true, true, true, false]
- * Index 0 is the root element of the struct which is set to false by default, index 1,2
- * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
- * index 5 correspond to column d. After flattening list<string> gets 2 columns.
- *
- * @param selectedColumns - comma separated list of selected column names
- * @param schema - object schema
- * @return - boolean array with true value set for the specified column names
- */
- public static boolean[] includeColumns(String selectedColumns,
- TypeDescription schema) {
- int numFlattenedCols = schema.getMaximumId();
- boolean[] results = new boolean[numFlattenedCols + 1];
- if ("*".equals(selectedColumns)) {
- Arrays.fill(results, true);
- return results;
- }
- if (selectedColumns != null &&
- schema.getCategory() == TypeDescription.Category.STRUCT) {
- List<String> fieldNames = schema.getFieldNames();
- List<TypeDescription> fields = schema.getChildren();
- for (String column: selectedColumns.split((","))) {
- TypeDescription col = findColumn(column, fieldNames, fields);
- if (col != null) {
- for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
- results[i] = true;
- }
- }
- }
- }
- return results;
- }
-
- private static TypeDescription findColumn(String columnName,
- List<String> fieldNames,
- List<TypeDescription> fields) {
- int i = 0;
- for(String fieldName: fieldNames) {
- if (fieldName.equalsIgnoreCase(columnName)) {
- return fields.get(i);
- } else {
- i += 1;
- }
- }
- return null;
- }
-
- public static List<OrcProto.Type> getOrcTypes(TypeDescription typeDescr) {
- List<OrcProto.Type> result = Lists.newArrayList();
- appendOrcTypes(result, typeDescr);
- return result;
- }
-
- private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr) {
- OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
- List<TypeDescription> children = typeDescr.getChildren();
- switch (typeDescr.getCategory()) {
- case BOOLEAN:
- type.setKind(OrcProto.Type.Kind.BOOLEAN);
- break;
- case BYTE:
- type.setKind(OrcProto.Type.Kind.BYTE);
- break;
- case SHORT:
- type.setKind(OrcProto.Type.Kind.SHORT);
- break;
- case INT:
- type.setKind(OrcProto.Type.Kind.INT);
- break;
- case LONG:
- type.setKind(OrcProto.Type.Kind.LONG);
- break;
- case FLOAT:
- type.setKind(OrcProto.Type.Kind.FLOAT);
- break;
- case DOUBLE:
- type.setKind(OrcProto.Type.Kind.DOUBLE);
- break;
- case STRING:
- type.setKind(OrcProto.Type.Kind.STRING);
- break;
- case CHAR:
- type.setKind(OrcProto.Type.Kind.CHAR);
- type.setMaximumLength(typeDescr.getMaxLength());
- break;
- case VARCHAR:
- type.setKind(OrcProto.Type.Kind.VARCHAR);
- type.setMaximumLength(typeDescr.getMaxLength());
- break;
- case BINARY:
- type.setKind(OrcProto.Type.Kind.BINARY);
- break;
- case TIMESTAMP:
- type.setKind(OrcProto.Type.Kind.TIMESTAMP);
- break;
- case DATE:
- type.setKind(OrcProto.Type.Kind.DATE);
- break;
- case DECIMAL:
- type.setKind(OrcProto.Type.Kind.DECIMAL);
- type.setPrecision(typeDescr.getPrecision());
- type.setScale(typeDescr.getScale());
- break;
- case LIST:
- type.setKind(OrcProto.Type.Kind.LIST);
- type.addSubtypes(children.get(0).getId());
- break;
- case MAP:
- type.setKind(OrcProto.Type.Kind.MAP);
- for(TypeDescription t: children) {
- type.addSubtypes(t.getId());
- }
- break;
- case STRUCT:
- type.setKind(OrcProto.Type.Kind.STRUCT);
- for(TypeDescription t: children) {
- type.addSubtypes(t.getId());
- }
- for(String field: typeDescr.getFieldNames()) {
- type.addFieldNames(field);
- }
- break;
- case UNION:
- type.setKind(OrcProto.Type.Kind.UNION);
- for(TypeDescription t: children) {
- type.addSubtypes(t.getId());
- }
- break;
- default:
- throw new IllegalArgumentException("Unknown category: " +
- typeDescr.getCategory());
- }
- result.add(type.build());
- if (children != null) {
- for(TypeDescription child: children) {
- appendOrcTypes(result, child);
- }
- }
- }
-
- /**
- * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
- * numbers based on the length of the result list being appended.
- *
- * @param result
- * @param typeDescr
- */
- public static void appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
- TypeDescription typeDescr) {
-
- int subtype = result.size();
- OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
- boolean needsAdd = true;
- List<TypeDescription> children = typeDescr.getChildren();
- switch (typeDescr.getCategory()) {
- case BOOLEAN:
- type.setKind(OrcProto.Type.Kind.BOOLEAN);
- break;
- case BYTE:
- type.setKind(OrcProto.Type.Kind.BYTE);
- break;
- case SHORT:
- type.setKind(OrcProto.Type.Kind.SHORT);
- break;
- case INT:
- type.setKind(OrcProto.Type.Kind.INT);
- break;
- case LONG:
- type.setKind(OrcProto.Type.Kind.LONG);
- break;
- case FLOAT:
- type.setKind(OrcProto.Type.Kind.FLOAT);
- break;
- case DOUBLE:
- type.setKind(OrcProto.Type.Kind.DOUBLE);
- break;
- case STRING:
- type.setKind(OrcProto.Type.Kind.STRING);
- break;
- case CHAR:
- type.setKind(OrcProto.Type.Kind.CHAR);
- type.setMaximumLength(typeDescr.getMaxLength());
- break;
- case VARCHAR:
- type.setKind(OrcProto.Type.Kind.VARCHAR);
- type.setMaximumLength(typeDescr.getMaxLength());
- break;
- case BINARY:
- type.setKind(OrcProto.Type.Kind.BINARY);
- break;
- case TIMESTAMP:
- type.setKind(OrcProto.Type.Kind.TIMESTAMP);
- break;
- case DATE:
- type.setKind(OrcProto.Type.Kind.DATE);
- break;
- case DECIMAL:
- type.setKind(OrcProto.Type.Kind.DECIMAL);
- type.setPrecision(typeDescr.getPrecision());
- type.setScale(typeDescr.getScale());
- break;
- case LIST:
- type.setKind(OrcProto.Type.Kind.LIST);
- type.addSubtypes(++subtype);
- result.add(type.build());
- needsAdd = false;
- appendOrcTypesRebuildSubtypes(result, children.get(0));
- break;
- case MAP:
- {
- // Make room for MAP type.
- result.add(null);
-
- // Add MAP type pair in order to determine their subtype values.
- appendOrcTypesRebuildSubtypes(result, children.get(0));
- int subtype2 = result.size();
- appendOrcTypesRebuildSubtypes(result, children.get(1));
- type.setKind(OrcProto.Type.Kind.MAP);
- type.addSubtypes(subtype + 1);
- type.addSubtypes(subtype2);
- result.set(subtype, type.build());
- needsAdd = false;
- }
- break;
- case STRUCT:
- {
- List<String> fieldNames = typeDescr.getFieldNames();
-
- // Make room for STRUCT type.
- result.add(null);
-
- List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
- for(TypeDescription child: children) {
- int fieldSubtype = result.size();
- fieldSubtypes.add(fieldSubtype);
- appendOrcTypesRebuildSubtypes(result, child);
- }
-
- type.setKind(OrcProto.Type.Kind.STRUCT);
-
- for (int i = 0 ; i < fieldNames.size(); i++) {
- type.addSubtypes(fieldSubtypes.get(i));
- type.addFieldNames(fieldNames.get(i));
- }
- result.set(subtype, type.build());
- needsAdd = false;
- }
- break;
- case UNION:
- {
- // Make room for UNION type.
- result.add(null);
-
- List<Integer> unionSubtypes = new ArrayList<Integer>(children.size());
- for(TypeDescription child: children) {
- int unionSubtype = result.size();
- unionSubtypes.add(unionSubtype);
- appendOrcTypesRebuildSubtypes(result, child);
- }
-
- type.setKind(OrcProto.Type.Kind.UNION);
- for (int i = 0 ; i < children.size(); i++) {
- type.addSubtypes(unionSubtypes.get(i));
- }
- result.set(subtype, type.build());
- needsAdd = false;
- }
- break;
- default:
- throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory());
- }
- if (needsAdd) {
- result.add(type.build());
- }
- }
-
- /**
- * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype
- * numbers based on the length of the result list being appended.
- *
- * @param result
- * @param types
- * @param columnId
- */
- public static int appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
- List<OrcProto.Type> types, int columnId) {
-
- OrcProto.Type oldType = types.get(columnId++);
-
- int subtype = result.size();
- OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
- boolean needsAdd = true;
- switch (oldType.getKind()) {
- case BOOLEAN:
- builder.setKind(OrcProto.Type.Kind.BOOLEAN);
- break;
- case BYTE:
- builder.setKind(OrcProto.Type.Kind.BYTE);
- break;
- case SHORT:
- builder.setKind(OrcProto.Type.Kind.SHORT);
- break;
- case INT:
- builder.setKind(OrcProto.Type.Kind.INT);
- break;
- case LONG:
- builder.setKind(OrcProto.Type.Kind.LONG);
- break;
- case FLOAT:
- builder.setKind(OrcProto.Type.Kind.FLOAT);
- break;
- case DOUBLE:
- builder.setKind(OrcProto.Type.Kind.DOUBLE);
- break;
- case STRING:
- builder.setKind(OrcProto.Type.Kind.STRING);
- break;
- case CHAR:
- builder.setKind(OrcProto.Type.Kind.CHAR);
- builder.setMaximumLength(oldType.getMaximumLength());
- break;
- case VARCHAR:
- builder.setKind(OrcProto.Type.Kind.VARCHAR);
- builder.setMaximumLength(oldType.getMaximumLength());
- break;
- case BINARY:
- builder.setKind(OrcProto.Type.Kind.BINARY);
- break;
- case TIMESTAMP:
- builder.setKind(OrcProto.Type.Kind.TIMESTAMP);
- break;
- case DATE:
- builder.setKind(OrcProto.Type.Kind.DATE);
- break;
- case DECIMAL:
- builder.setKind(OrcProto.Type.Kind.DECIMAL);
- builder.setPrecision(oldType.getPrecision());
- builder.setScale(oldType.getScale());
- break;
- case LIST:
- builder.setKind(OrcProto.Type.Kind.LIST);
- builder.addSubtypes(++subtype);
- result.add(builder.build());
- needsAdd = false;
- columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
- break;
- case MAP:
- {
- // Make room for MAP type.
- result.add(null);
-
- // Add MAP type pair in order to determine their subtype values.
- columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
- int subtype2 = result.size();
- columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
- builder.setKind(OrcProto.Type.Kind.MAP);
- builder.addSubtypes(subtype + 1);
- builder.addSubtypes(subtype2);
- result.set(subtype, builder.build());
- needsAdd = false;
- }
- break;
- case STRUCT:
- {
- List<String> fieldNames = oldType.getFieldNamesList();
-
- // Make room for STRUCT type.
- result.add(null);
-
- List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
- for(int i = 0 ; i < fieldNames.size(); i++) {
- int fieldSubtype = result.size();
- fieldSubtypes.add(fieldSubtype);
- columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
- }
-
- builder.setKind(OrcProto.Type.Kind.STRUCT);
-
- for (int i = 0 ; i < fieldNames.size(); i++) {
- builder.addSubtypes(fieldSubtypes.get(i));
- builder.addFieldNames(fieldNames.get(i));
- }
- result.set(subtype, builder.build());
- needsAdd = false;
- }
- break;
- case UNION:
- {
- int subtypeCount = oldType.getSubtypesCount();
-
- // Make room for UNION type.
- result.add(null);
-
- List<Integer> unionSubtypes = new ArrayList<Integer>(subtypeCount);
- for(int i = 0 ; i < subtypeCount; i++) {
- int unionSubtype = result.size();
- unionSubtypes.add(unionSubtype);
- columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
- }
-
- builder.setKind(OrcProto.Type.Kind.UNION);
- for (int i = 0 ; i < subtypeCount; i++) {
- builder.addSubtypes(unionSubtypes.get(i));
- }
- result.set(subtype, builder.build());
- needsAdd = false;
- }
- break;
- default:
- throw new IllegalArgumentException("Unknown category: " + oldType.getKind());
- }
- if (needsAdd) {
- result.add(builder.build());
- }
- return columnId;
- }
-
- /**
- * Translate the given rootColumn from the list of types to a TypeDescription.
- * @param types all of the types
- * @param rootColumn translate this type
- * @return a new TypeDescription that matches the given rootColumn
- */
- public static
- TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
- int rootColumn) {
- OrcProto.Type type = types.get(rootColumn);
- switch (type.getKind()) {
- case BOOLEAN:
- return TypeDescription.createBoolean();
- case BYTE:
- return TypeDescription.createByte();
- case SHORT:
- return TypeDescription.createShort();
- case INT:
- return TypeDescription.createInt();
- case LONG:
- return TypeDescription.createLong();
- case FLOAT:
- return TypeDescription.createFloat();
- case DOUBLE:
- return TypeDescription.createDouble();
- case STRING:
- return TypeDescription.createString();
- case CHAR:
- case VARCHAR: {
- TypeDescription result = type.getKind() == OrcProto.Type.Kind.CHAR ?
- TypeDescription.createChar() : TypeDescription.createVarchar();
- if (type.hasMaximumLength()) {
- result.withMaxLength(type.getMaximumLength());
- }
- return result;
- }
- case BINARY:
- return TypeDescription.createBinary();
- case TIMESTAMP:
- return TypeDescription.createTimestamp();
- case DATE:
- return TypeDescription.createDate();
- case DECIMAL: {
- TypeDescription result = TypeDescription.createDecimal();
- if (type.hasScale()) {
- result.withScale(type.getScale());
- }
- if (type.hasPrecision()) {
- result.withPrecision(type.getPrecision());
- }
- return result;
- }
- case LIST:
- return TypeDescription.createList(
- convertTypeFromProtobuf(types, type.getSubtypes(0)));
- case MAP:
- return TypeDescription.createMap(
- convertTypeFromProtobuf(types, type.getSubtypes(0)),
- convertTypeFromProtobuf(types, type.getSubtypes(1)));
- case STRUCT: {
- TypeDescription result = TypeDescription.createStruct();
- for(int f=0; f < type.getSubtypesCount(); ++f) {
- result.addField(type.getFieldNames(f),
- convertTypeFromProtobuf(types, type.getSubtypes(f)));
- }
- return result;
- }
- case UNION: {
- TypeDescription result = TypeDescription.createUnion();
- for(int f=0; f < type.getSubtypesCount(); ++f) {
- result.addUnionChild(
- convertTypeFromProtobuf(types, type.getSubtypes(f)));
- }
- return result;
- }
- }
- throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
- }
-
- public static List<StripeInformation> convertProtoStripesToStripes(
- List<OrcProto.StripeInformation> stripes) {
- List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
- for (OrcProto.StripeInformation info : stripes) {
- result.add(new ReaderImpl.StripeInformationImpl(info));
- }
- return result;
- }
-
- public static List<TypeDescription> setTypeBuilderFromSchema(
- OrcProto.Type.Builder type, TypeDescription schema) {
- List<TypeDescription> children = schema.getChildren();
- switch (schema.getCategory()) {
- case BOOLEAN:
- type.setKind(OrcProto.Type.Kind.BOOLEAN);
- break;
- case BYTE:
- type.setKind(OrcProto.Type.Kind.BYTE);
- break;
- case SHORT:
- type.setKind(OrcProto.Type.Kind.SHORT);
- break;
- case INT:
- type.setKind(OrcProto.Type.Kind.INT);
- break;
- case LONG:
- type.setKind(OrcProto.Type.Kind.LONG);
- break;
- case FLOAT:
- type.setKind(OrcProto.Type.Kind.FLOAT);
- break;
- case DOUBLE:
- type.setKind(OrcProto.Type.Kind.DOUBLE);
- break;
- case STRING:
- type.setKind(OrcProto.Type.Kind.STRING);
- break;
- case CHAR:
- type.setKind(OrcProto.Type.Kind.CHAR);
- type.setMaximumLength(schema.getMaxLength());
- break;
- case VARCHAR:
- type.setKind(OrcProto.Type.Kind.VARCHAR);
- type.setMaximumLength(schema.getMaxLength());
- break;
- case BINARY:
- type.setKind(OrcProto.Type.Kind.BINARY);
- break;
- case TIMESTAMP:
- type.setKind(OrcProto.Type.Kind.TIMESTAMP);
- break;
- case DATE:
- type.setKind(OrcProto.Type.Kind.DATE);
- break;
- case DECIMAL:
- type.setKind(OrcProto.Type.Kind.DECIMAL);
- type.setPrecision(schema.getPrecision());
- type.setScale(schema.getScale());
- break;
- case LIST:
- type.setKind(OrcProto.Type.Kind.LIST);
- type.addSubtypes(children.get(0).getId());
- break;
- case MAP:
- type.setKind(OrcProto.Type.Kind.MAP);
- for(TypeDescription t: children) {
- type.addSubtypes(t.getId());
- }
- break;
- case STRUCT:
- type.setKind(OrcProto.Type.Kind.STRUCT);
- for(TypeDescription t: children) {
- type.addSubtypes(t.getId());
- }
- for(String field: schema.getFieldNames()) {
- type.addFieldNames(field);
- }
- break;
- case UNION:
- type.setKind(OrcProto.Type.Kind.UNION);
- for(TypeDescription t: children) {
- type.addSubtypes(t.getId());
- }
- break;
- default:
- throw new IllegalArgumentException("Unknown category: " +
- schema.getCategory());
- }
- return children;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Reader.java b/orc/src/java/org/apache/orc/Reader.java
deleted file mode 100644
index c2d5235..0000000
--- a/orc/src/java/org/apache/orc/Reader.java
+++ /dev/null
@@ -1,375 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-
-/**
- * The interface for reading ORC files.
- *
- * One Reader can support multiple concurrent RecordReader.
- */
-public interface Reader {
-
- /**
- * Get the number of rows in the file.
- * @return the number of rows
- */
- long getNumberOfRows();
-
- /**
- * Get the deserialized data size of the file
- * @return raw data size
- */
- long getRawDataSize();
-
- /**
- * Get the deserialized data size of the specified columns
- * @param colNames
- * @return raw data size of columns
- */
- long getRawDataSizeOfColumns(List<String> colNames);
-
- /**
- * Get the deserialized data size of the specified columns ids
- * @param colIds - internal column id (check orcfiledump for column ids)
- * @return raw data size of columns
- */
- long getRawDataSizeFromColIndices(List<Integer> colIds);
-
- /**
- * Get the user metadata keys.
- * @return the set of metadata keys
- */
- List<String> getMetadataKeys();
-
- /**
- * Get a user metadata value.
- * @param key a key given by the user
- * @return the bytes associated with the given key
- */
- ByteBuffer getMetadataValue(String key);
-
- /**
- * Did the user set the given metadata value.
- * @param key the key to check
- * @return true if the metadata value was set
- */
- boolean hasMetadataValue(String key);
-
- /**
- * Get the compression kind.
- * @return the kind of compression in the file
- */
- CompressionKind getCompressionKind();
-
- /**
- * Get the buffer size for the compression.
- * @return number of bytes to buffer for the compression codec.
- */
- int getCompressionSize();
-
- /**
- * Get the number of rows per a entry in the row index.
- * @return the number of rows per an entry in the row index or 0 if there
- * is no row index.
- */
- int getRowIndexStride();
-
- /**
- * Get the list of stripes.
- * @return the information about the stripes in order
- */
- List<StripeInformation> getStripes();
-
- /**
- * Get the length of the file.
- * @return the number of bytes in the file
- */
- long getContentLength();
-
- /**
- * Get the statistics about the columns in the file.
- * @return the information about the column
- */
- ColumnStatistics[] getStatistics();
-
- /**
- * Get the type of rows in this ORC file.
- */
- TypeDescription getSchema();
-
- /**
- * Get the list of types contained in the file. The root type is the first
- * type in the list.
- * @return the list of flattened types
- * @deprecated use getSchema instead
- */
- List<OrcProto.Type> getTypes();
-
- /**
- * Get the file format version.
- */
- OrcFile.Version getFileVersion();
-
- /**
- * Get the version of the writer of this file.
- */
- OrcFile.WriterVersion getWriterVersion();
-
- /**
- * Get the file tail (footer + postscript)
- *
- * @return - file tail
- */
- OrcProto.FileTail getFileTail();
-
- /**
- * Options for creating a RecordReader.
- */
- public static class Options {
- private boolean[] include;
- private long offset = 0;
- private long length = Long.MAX_VALUE;
- private SearchArgument sarg = null;
- private String[] columnNames = null;
- private Boolean useZeroCopy = null;
- private Boolean skipCorruptRecords = null;
- private TypeDescription schema = null;
- private DataReader dataReader = null;
-
- /**
- * Set the list of columns to read.
- * @param include a list of columns to read
- * @return this
- */
- public Options include(boolean[] include) {
- this.include = include;
- return this;
- }
-
- /**
- * Set the range of bytes to read
- * @param offset the starting byte offset
- * @param length the number of bytes to read
- * @return this
- */
- public Options range(long offset, long length) {
- this.offset = offset;
- this.length = length;
- return this;
- }
-
- /**
- * Set the schema on read type description.
- */
- public Options schema(TypeDescription schema) {
- this.schema = schema;
- return this;
- }
-
- /**
- * Set search argument for predicate push down.
- * @param sarg the search argument
- * @param columnNames the column names for
- * @return this
- */
- public Options searchArgument(SearchArgument sarg, String[] columnNames) {
- this.sarg = sarg;
- this.columnNames = columnNames;
- return this;
- }
-
- /**
- * Set whether to use zero copy from HDFS.
- * @param value the new zero copy flag
- * @return this
- */
- public Options useZeroCopy(boolean value) {
- this.useZeroCopy = value;
- return this;
- }
-
- public Options dataReader(DataReader value) {
- this.dataReader = value;
- return this;
- }
-
- /**
- * Set whether to skip corrupt records.
- * @param value the new skip corrupt records flag
- * @return this
- */
- public Options skipCorruptRecords(boolean value) {
- this.skipCorruptRecords = value;
- return this;
- }
-
- public boolean[] getInclude() {
- return include;
- }
-
- public long getOffset() {
- return offset;
- }
-
- public long getLength() {
- return length;
- }
-
- public TypeDescription getSchema() {
- return schema;
- }
-
- public SearchArgument getSearchArgument() {
- return sarg;
- }
-
- public String[] getColumnNames() {
- return columnNames;
- }
-
- public long getMaxOffset() {
- long result = offset + length;
- if (result < 0) {
- result = Long.MAX_VALUE;
- }
- return result;
- }
-
- public Boolean getUseZeroCopy() {
- return useZeroCopy;
- }
-
- public Boolean getSkipCorruptRecords() {
- return skipCorruptRecords;
- }
-
- public DataReader getDataReader() {
- return dataReader;
- }
-
- public Options clone() {
- Options result = new Options();
- result.include = include;
- result.offset = offset;
- result.length = length;
- result.sarg = sarg;
- result.schema = schema;
- result.columnNames = columnNames;
- result.useZeroCopy = useZeroCopy;
- result.skipCorruptRecords = skipCorruptRecords;
- result.dataReader = dataReader == null ? null : dataReader.clone();
- return result;
- }
-
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- buffer.append("{include: ");
- if (include == null) {
- buffer.append("null");
- } else {
- buffer.append("[");
- for(int i=0; i < include.length; ++i) {
- if (i != 0) {
- buffer.append(", ");
- }
- buffer.append(include[i]);
- }
- buffer.append("]");
- }
- buffer.append(", offset: ");
- buffer.append(offset);
- buffer.append(", length: ");
- buffer.append(length);
- if (sarg != null) {
- buffer.append(", sarg: ");
- buffer.append(sarg.toString());
- buffer.append(", columns: [");
- for(int i=0; i < columnNames.length; ++i) {
- if (i != 0) {
- buffer.append(", ");
- }
- buffer.append("'");
- buffer.append(columnNames[i]);
- buffer.append("'");
- }
- buffer.append("]");
- }
- if (schema != null) {
- buffer.append(", schema: ");
- schema.printToBuffer(buffer);
- }
- buffer.append("}");
- return buffer.toString();
- }
- }
-
- /**
- * Create a RecordReader that reads everything with the default options.
- * @return a new RecordReader
- * @throws IOException
- */
- RecordReader rows() throws IOException;
-
- /**
- * Create a RecordReader that uses the options given.
- * This method can't be named rows, because many callers used rows(null)
- * before the rows() method was introduced.
- * @param options the options to read with
- * @return a new RecordReader
- * @throws IOException
- */
- RecordReader rows(Options options) throws IOException;
-
- /**
- * @return List of integers representing version of the file, in order from major to minor.
- */
- List<Integer> getVersionList();
-
- /**
- * @return Gets the size of metadata, in bytes.
- */
- int getMetadataSize();
-
- /**
- * @return Stripe statistics, in original protobuf form.
- */
- List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
-
- /**
- * @return Stripe statistics.
- */
- List<StripeStatistics> getStripeStatistics() throws IOException;
-
- /**
- * @return File statistics, in original protobuf form.
- */
- List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
-
- /**
- * @return Serialized file metadata read from disk for the purposes of caching, etc.
- */
- ByteBuffer getSerializedFileFooter();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/RecordReader.java b/orc/src/java/org/apache/orc/RecordReader.java
deleted file mode 100644
index 09ba0f0..0000000
--- a/orc/src/java/org/apache/orc/RecordReader.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-/**
- * A row-by-row iterator for ORC files.
- */
-public interface RecordReader {
- /**
- * Read the next row batch. The size of the batch to read cannot be
- * controlled by the callers. Caller need to look at
- * VectorizedRowBatch.size of the retunred object to know the batch
- * size read.
- * @param batch a row batch object to read into
- * @return were more rows available to read?
- * @throws java.io.IOException
- */
- boolean nextBatch(VectorizedRowBatch batch) throws IOException;
-
- /**
- * Get the row number of the row that will be returned by the following
- * call to next().
- * @return the row number from 0 to the number of rows in the file
- * @throws java.io.IOException
- */
- long getRowNumber() throws IOException;
-
- /**
- * Get the progress of the reader through the rows.
- * @return a fraction between 0.0 and 1.0 of rows read
- * @throws java.io.IOException
- */
- float getProgress() throws IOException;
-
- /**
- * Release the resources associated with the given reader.
- * @throws java.io.IOException
- */
- void close() throws IOException;
-
- /**
- * Seek to a particular row number.
- */
- void seekToRow(long rowCount) throws IOException;
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StringColumnStatistics.java b/orc/src/java/org/apache/orc/StringColumnStatistics.java
deleted file mode 100644
index 5a868d0..0000000
--- a/orc/src/java/org/apache/orc/StringColumnStatistics.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for string columns.
- */
-public interface StringColumnStatistics extends ColumnStatistics {
- /**
- * Get the minimum string.
- * @return the minimum
- */
- String getMinimum();
-
- /**
- * Get the maximum string.
- * @return the maximum
- */
- String getMaximum();
-
- /**
- * Get the total length of all strings
- * @return the sum (total length)
- */
- long getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StripeInformation.java b/orc/src/java/org/apache/orc/StripeInformation.java
deleted file mode 100644
index 38f7eba..0000000
--- a/orc/src/java/org/apache/orc/StripeInformation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-/**
- * Information about the stripes in an ORC file that is provided by the Reader.
- */
-public interface StripeInformation {
- /**
- * Get the byte offset of the start of the stripe.
- * @return the bytes from the start of the file
- */
- long getOffset();
-
- /**
- * Get the total length of the stripe in bytes.
- * @return the number of bytes in the stripe
- */
- long getLength();
-
- /**
- * Get the length of the stripe's indexes.
- * @return the number of bytes in the index
- */
- long getIndexLength();
-
- /**
- * Get the length of the stripe's data.
- * @return the number of bytes in the stripe
- */
- long getDataLength();
-
- /**
- * Get the length of the stripe's tail section, which contains its index.
- * @return the number of bytes in the tail
- */
- long getFooterLength();
-
- /**
- * Get the number of rows in the stripe.
- * @return a count of the number of rows
- */
- long getNumberOfRows();
-}