You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2013/03/05 21:44:52 UTC
svn commit: r1452992 [3/8] - in /hive/trunk: ./ ivy/ ql/
ql/src/gen/protobuf/ ql/src/gen/protobuf/gen-java/
ql/src/gen/protobuf/gen-java/org/ ql/src/gen/protobuf/gen-java/org/apache/
ql/src/gen/protobuf/gen-java/org/apache/hadoop/ ql/src/gen/protobuf/g...
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+class BitFieldReader {
+ private RunLengthByteReader input;
+ private final int bitSize;
+ private int current;
+ private int bitsLeft;
+ private final int mask;
+
+ BitFieldReader(InStream input,
+ int bitSize) throws IOException {
+ this.input = new RunLengthByteReader(input);
+ this.bitSize = bitSize;
+ mask = (1 << bitSize) - 1;
+ }
+
+ private void readByte() throws IOException {
+ if (input.hasNext()) {
+ current = 0xff & input.next();
+ bitsLeft = 8;
+ } else {
+ throw new EOFException("Read past end of bit field from " + input);
+ }
+ }
+
+ int next() throws IOException {
+ int result = 0;
+ int bitsLeftToRead = bitSize;
+ while (bitsLeftToRead > bitsLeft) {
+ result <<= bitsLeft;
+ result |= current & ((1 << bitsLeft) - 1);
+ bitsLeftToRead -= bitsLeft;
+ readByte();
+ }
+ if (bitsLeftToRead > 0) {
+ result <<= bitsLeftToRead;
+ bitsLeft -= bitsLeftToRead;
+ result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+ }
+ return result & mask;
+ }
+
+ void seek(PositionProvider index) throws IOException {
+ input.seek(index);
+ int consumed = (int) index.getNext();
+ if (consumed > 8) {
+ throw new IllegalArgumentException("Seek past end of byte at " +
+ consumed + " in " + input);
+ } else if (consumed != 0) {
+ readByte();
+ bitsLeft = 8 - consumed;
+ } else {
+ bitsLeft = 0;
+ }
+ }
+
+ void skip(long items) throws IOException {
+ long totalBits = bitSize * items;
+ if (bitsLeft >= totalBits) {
+ bitsLeft -= totalBits;
+ } else {
+ totalBits -= bitsLeft;
+ input.skip(totalBits / 8);
+ current = input.next();
+ bitsLeft = (int) (8 - (totalBits % 8));
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+
+class BitFieldWriter {
+ private RunLengthByteWriter output;
+ private final int bitSize;
+ private byte current = 0;
+ private int bitsLeft = 8;
+
+ BitFieldWriter(PositionedOutputStream output,
+ int bitSize) throws IOException {
+ this.output = new RunLengthByteWriter(output);
+ this.bitSize = bitSize;
+ }
+
+ private void writeByte() throws IOException {
+ output.write(current);
+ current = 0;
+ bitsLeft = 8;
+ }
+
+ void flush() throws IOException {
+ if (bitsLeft != 8) {
+ writeByte();
+ }
+ output.flush();
+ }
+
+ void write(int value) throws IOException {
+ int bitsToWrite = bitSize;
+ while (bitsToWrite > bitsLeft) {
+ // add the bits to the bottom of the current word
+ current |= value >>> (bitsToWrite - bitsLeft);
+ // subtract out the bits we just added
+ bitsToWrite -= bitsLeft;
+ // zero out the bits above bitsToWrite
+ value &= (1 << bitsToWrite) - 1;
+ writeByte();
+ }
+ bitsLeft -= bitsToWrite;
+ current |= value << bitsLeft;
+ if (bitsLeft == 0) {
+ writeByte();
+ }
+ }
+
+ void getPosition(PositionRecorder recorder) throws IOException {
+ output.getPosition(recorder);
+ recorder.addPosition(8 - bitsLeft);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for boolean columns.
+ */
+public interface BooleanColumnStatistics extends ColumnStatistics {
+ long getFalseCount();
+
+ long getTrueCount();
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics that are available for all types of columns.
+ */
+public interface ColumnStatistics {
+ /**
+ * Get the number of values in this column. It will differ from the number
+ * of rows because of NULL values and repeated values.
+ * @return the number of values
+ */
+ long getNumberOfValues();
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,516 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+class ColumnStatisticsImpl implements ColumnStatistics {
+
+ private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
+ implements BooleanColumnStatistics {
+ private long trueCount = 0;
+
+ BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
+ trueCount = bkt.getCount(0);
+ }
+
+ BooleanStatisticsImpl() {
+ }
+
+ @Override
+ void reset() {
+ super.reset();
+ trueCount = 0;
+ }
+
+ @Override
+ void updateBoolean(boolean value) {
+ if (value) {
+ trueCount += 1;
+ }
+ }
+
+ @Override
+ void merge(ColumnStatisticsImpl other) {
+ super.merge(other);
+ BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
+ trueCount += bkt.trueCount;
+ }
+
+ @Override
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder = super.serialize();
+ OrcProto.BucketStatistics.Builder bucket =
+ OrcProto.BucketStatistics.newBuilder();
+ bucket.addCount(trueCount);
+ builder.setBucketStatistics(bucket);
+ return builder;
+ }
+
+ @Override
+ public long getFalseCount() {
+ return getNumberOfValues() - trueCount;
+ }
+
+ @Override
+ public long getTrueCount() {
+ return trueCount;
+ }
+
+ @Override
+ public String toString() {
+ return super.toString() + " true: " + trueCount;
+ }
+ }
+
+ private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
+ implements IntegerColumnStatistics {
+
+ private long minimum = Long.MAX_VALUE;
+ private long maximum = Long.MIN_VALUE;
+ private long sum = 0;
+ private boolean hasMinimum = false;
+ private boolean overflow = false;
+
+ IntegerStatisticsImpl() {
+ }
+
+ IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
+ if (intStat.hasMinimum()) {
+ hasMinimum = true;
+ minimum = intStat.getMinimum();
+ }
+ if (intStat.hasMaximum()) {
+ maximum = intStat.getMaximum();
+ }
+ if (intStat.hasSum()) {
+ sum = intStat.getSum();
+ } else {
+ overflow = true;
+ }
+ }
+
+ @Override
+ void reset() {
+ super.reset();
+ hasMinimum = false;
+ minimum = Long.MAX_VALUE;
+ maximum = Long.MIN_VALUE;
+ sum = 0;
+ overflow = false;
+ }
+
+ @Override
+ void updateInteger(long value) {
+ if (!hasMinimum) {
+ hasMinimum = true;
+ minimum = value;
+ maximum = value;
+ } else if (value < minimum) {
+ minimum = value;
+ } else if (value > maximum) {
+ maximum = value;
+ }
+ if (!overflow) {
+ boolean wasPositive = sum >= 0;
+ sum += value;
+ if ((value >= 0) == wasPositive) {
+ overflow = (sum >= 0) != wasPositive;
+ }
+ }
+ }
+
+ @Override
+ void merge(ColumnStatisticsImpl other) {
+ IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
+ if (!hasMinimum) {
+ hasMinimum = otherInt.hasMinimum;
+ minimum = otherInt.minimum;
+ maximum = otherInt.maximum;
+ } else if (otherInt.hasMinimum) {
+ if (otherInt.minimum < minimum) {
+ minimum = otherInt.minimum;
+ }
+ if (otherInt.maximum > maximum) {
+ maximum = otherInt.maximum;
+ }
+ }
+ super.merge(other);
+ overflow |= otherInt.overflow;
+ if (!overflow) {
+ boolean wasPositive = sum >= 0;
+ sum += otherInt.sum;
+ if ((otherInt.sum >= 0) == wasPositive) {
+ overflow = (sum >= 0) != wasPositive;
+ }
+ }
+ }
+
+ @Override
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder = super.serialize();
+ OrcProto.IntegerStatistics.Builder intb =
+ OrcProto.IntegerStatistics.newBuilder();
+ if (hasMinimum) {
+ intb.setMinimum(minimum);
+ intb.setMaximum(maximum);
+ }
+ if (!overflow) {
+ intb.setSum(sum);
+ }
+ builder.setIntStatistics(intb);
+ return builder;
+ }
+
+ @Override
+ public long getMinimum() {
+ return minimum;
+ }
+
+ @Override
+ public long getMaximum() {
+ return maximum;
+ }
+
+ @Override
+ public boolean isSumDefined() {
+ return !overflow;
+ }
+
+ @Override
+ public long getSum() {
+ return sum;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (hasMinimum) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ }
+ if (!overflow) {
+ buf.append(" sum: ");
+ buf.append(sum);
+ }
+ return buf.toString();
+ }
+ }
+
+ private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
+ implements DoubleColumnStatistics {
+ private boolean hasMinimum = false;
+ private double minimum = Double.MAX_VALUE;
+ private double maximum = Double.MIN_VALUE;
+ private double sum = 0;
+
+ DoubleStatisticsImpl() {
+ }
+
+ DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
+ if (dbl.hasMinimum()) {
+ hasMinimum = true;
+ minimum = dbl.getMinimum();
+ }
+ if (dbl.hasMaximum()) {
+ maximum = dbl.getMaximum();
+ }
+ if (dbl.hasSum()) {
+ sum = dbl.getSum();
+ }
+ }
+
+ @Override
+ void reset() {
+ super.reset();
+ hasMinimum = false;
+ minimum = Double.MAX_VALUE;
+ maximum = Double.MIN_VALUE;
+ sum = 0;
+ }
+
+ @Override
+ void updateDouble(double value) {
+ if (!hasMinimum) {
+ hasMinimum = true;
+ minimum = value;
+ maximum = value;
+ } else if (value < minimum) {
+ minimum = value;
+ } else if (value > maximum) {
+ maximum = value;
+ }
+ sum += value;
+ }
+
+ @Override
+ void merge(ColumnStatisticsImpl other) {
+ super.merge(other);
+ DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
+ if (!hasMinimum) {
+ hasMinimum = dbl.hasMinimum;
+ minimum = dbl.minimum;
+ maximum = dbl.maximum;
+ } else if (dbl.hasMinimum) {
+ if (dbl.minimum < minimum) {
+ minimum = dbl.minimum;
+ }
+ if (dbl.maximum > maximum) {
+ maximum = dbl.maximum;
+ }
+ }
+ sum += dbl.sum;
+ }
+
+ @Override
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder = super.serialize();
+ OrcProto.DoubleStatistics.Builder dbl =
+ OrcProto.DoubleStatistics.newBuilder();
+ if (hasMinimum) {
+ dbl.setMinimum(minimum);
+ dbl.setMaximum(maximum);
+ }
+ dbl.setSum(sum);
+ builder.setDoubleStatistics(dbl);
+ return builder;
+ }
+
+ @Override
+ public double getMinimum() {
+ return minimum;
+ }
+
+ @Override
+ public double getMaximum() {
+ return maximum;
+ }
+
+ @Override
+ public double getSum() {
+ return sum;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (hasMinimum) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ }
+ buf.append(" sum: ");
+ buf.append(sum);
+ return buf.toString();
+ }
+ }
+
+ private static final class StringStatisticsImpl extends ColumnStatisticsImpl
+ implements StringColumnStatistics {
+ private String minimum = null;
+ private String maximum = null;
+
+ StringStatisticsImpl() {
+ }
+
+ StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.StringStatistics str = stats.getStringStatistics();
+ if (str.hasMaximum()) {
+ maximum = str.getMaximum();
+ }
+ if (str.hasMinimum()) {
+ minimum = str.getMinimum();
+ }
+ }
+
+ @Override
+ void reset() {
+ super.reset();
+ minimum = null;
+ maximum = null;
+ }
+
+ @Override
+ void updateString(String value) {
+ if (minimum == null) {
+ minimum = value;
+ maximum = value;
+ } else if (minimum.compareTo(value) > 0) {
+ minimum = value;
+ } else if (maximum.compareTo(value) < 0) {
+ maximum = value;
+ }
+ }
+
+ @Override
+ void merge(ColumnStatisticsImpl other) {
+ super.merge(other);
+ StringStatisticsImpl str = (StringStatisticsImpl) other;
+ if (minimum == null) {
+ minimum = str.minimum;
+ maximum = str.maximum;
+ } else if (str.minimum != null) {
+ if (minimum.compareTo(str.minimum) > 0) {
+ minimum = str.minimum;
+ } else if (maximum.compareTo(str.maximum) < 0) {
+ maximum = str.maximum;
+ }
+ }
+ }
+
+ @Override
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.StringStatistics.Builder str =
+ OrcProto.StringStatistics.newBuilder();
+ if (getNumberOfValues() != 0) {
+ str.setMinimum(minimum);
+ str.setMaximum(maximum);
+ }
+ result.setStringStatistics(str);
+ return result;
+ }
+
+ @Override
+ public String getMinimum() {
+ return minimum;
+ }
+
+ @Override
+ public String getMaximum() {
+ return maximum;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ }
+ return buf.toString();
+ }
+ }
+
+ private long count = 0;
+
+ ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ if (stats.hasNumberOfValues()) {
+ count = stats.getNumberOfValues();
+ }
+ }
+
+ ColumnStatisticsImpl() {
+ }
+
+ void increment() {
+ count += 1;
+ }
+
+ void updateBoolean(boolean value) {
+ throw new UnsupportedOperationException("Can't update boolean");
+ }
+
+ void updateInteger(long value) {
+ throw new UnsupportedOperationException("Can't update integer");
+ }
+
+ void updateDouble(double value) {
+ throw new UnsupportedOperationException("Can't update double");
+ }
+
+ void updateString(String value) {
+ throw new UnsupportedOperationException("Can't update string");
+ }
+
+ void merge(ColumnStatisticsImpl stats) {
+ count += stats.count;
+ }
+
+ void reset() {
+ count = 0;
+ }
+
+ @Override
+ public long getNumberOfValues() {
+ return count;
+ }
+
+ @Override
+ public String toString() {
+ return "count: " + count;
+ }
+
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder =
+ OrcProto.ColumnStatistics.newBuilder();
+ builder.setNumberOfValues(count);
+ return builder;
+ }
+
+ static ColumnStatisticsImpl create(ObjectInspector inspector) {
+ switch (inspector.getCategory()) {
+ case PRIMITIVE:
+ switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
+ case BOOLEAN:
+ return new BooleanStatisticsImpl();
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new IntegerStatisticsImpl();
+ case FLOAT:
+ case DOUBLE:
+ return new DoubleStatisticsImpl();
+ case STRING:
+ return new StringStatisticsImpl();
+ default:
+ return new ColumnStatisticsImpl();
+ }
+ default:
+ return new ColumnStatisticsImpl();
+ }
+ }
+
+ static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
+ if (stats.hasBucketStatistics()) {
+ return new BooleanStatisticsImpl(stats);
+ } else if (stats.hasIntStatistics()) {
+ return new IntegerStatisticsImpl(stats);
+ } else if (stats.hasDoubleStatistics()) {
+ return new DoubleStatisticsImpl(stats);
+ } else if (stats.hasStringStatistics()) {
+ return new StringStatisticsImpl(stats);
+ } else {
+ return new ColumnStatisticsImpl(stats);
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+interface CompressionCodec {
+ /**
+ * Compress the in buffer to the out buffer.
+ * @param in the bytes to compress
+ * @param out the uncompressed bytes
+ * @param overflow put any additional bytes here
+ * @return true if the output is smaller than input
+ * @throws IOException
+ */
+ boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
+ ) throws IOException;
+
+ /**
+ * Decompress the in buffer to the out buffer.
+ * @param in the bytes to decompress
+ * @param out the decompressed bytes
+ * @throws IOException
+ */
+ void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * An enumeration that lists the generic compression algorithms that
+ * can be applied to ORC files.
+ */
+public enum CompressionKind {
+ NONE, ZLIB, SNAPPY, LZO
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for float and double columns.
+ */
+public interface DoubleColumnStatistics extends ColumnStatistics {
+
+ /**
+ * Get the smallest value in the column. Only defined if getNumberOfValues
+ * is non-zero.
+ * @return the minimum
+ */
+ double getMinimum();
+
+ /**
+ * Get the largest value in the column. Only defined if getNumberOfValues
+ * is non-zero.
+ * @return the maximum
+ */
+ double getMaximum();
+
+ /**
+ * Get the sum of the values in the column.
+ * @return the sum
+ */
+ double getSum();
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,270 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * A class that is a growable array of bytes. Growth is managed in terms of
+ * chunks that are allocated when needed.
+ */
+final class DynamicByteArray {
+ static final int DEFAULT_CHUNKSIZE = 32 * 1024;
+ static final int DEFAULT_NUM_CHUNKS = 128;
+
+ private final int chunkSize; // our allocation sizes
+ private byte[][] data; // the real data
+ private int length; // max set element index +1
+ private int initializedChunks = 0; // the number of chunks created
+
+ public DynamicByteArray() {
+ this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
+ }
+
+ public DynamicByteArray(int numChunks, int chunkSize) {
+ if (chunkSize == 0) {
+ throw new IllegalArgumentException("bad chunksize");
+ }
+ this.chunkSize = chunkSize;
+ data = new byte[numChunks][];
+ }
+
+ /**
+ * Ensure that the given index is valid.
+ */
+ private void grow(int chunkIndex) {
+ if (chunkIndex >= initializedChunks) {
+ if (chunkIndex >= data.length) {
+ int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+ byte[][] newChunk = new byte[newSize][];
+ System.arraycopy(data, 0, newChunk, 0, data.length);
+ data = newChunk;
+ }
+ for(int i=initializedChunks; i <= chunkIndex; ++i) {
+ data[i] = new byte[chunkSize];
+ }
+ initializedChunks = chunkIndex + 1;
+ }
+ }
+
+ public byte get(int index) {
+ if (index >= length) {
+ throw new IndexOutOfBoundsException("Index " + index +
+ " is outside of 0.." +
+ (length - 1));
+ }
+ int i = index / chunkSize;
+ int j = index % chunkSize;
+ return data[i][j];
+ }
+
+ public void set(int index, byte value) {
+ int i = index / chunkSize;
+ int j = index % chunkSize;
+ grow(i);
+ if (index >= length) {
+ length = index + 1;
+ }
+ data[i][j] = value;
+ }
+
+ public int add(byte value) {
+ int i = length / chunkSize;
+ int j = length % chunkSize;
+ grow(i);
+ data[i][j] = value;
+ int result = length;
+ length += 1;
+ return result;
+ }
+
+ /**
+ * Copy a slice of a byte array into our buffer.
+ * @param value the array to copy from
+ * @param valueOffset the first location to copy from value
+ * @param valueLength the number of bytes to copy from value
+ * @return
+ */
+ public int add(byte[] value, int valueOffset, int valueLength) {
+ int i = length / chunkSize;
+ int j = length % chunkSize;
+ grow((length + valueLength) / chunkSize);
+ int remaining = valueLength;
+ while (remaining > 0) {
+ int size = Math.min(remaining, chunkSize - j);
+ System.arraycopy(value, valueOffset, data[i], j, size);
+ remaining -= size;
+ valueOffset += size;
+ i += 1;
+ j = 0;
+ }
+ int result = length;
+ length += valueLength;
+ return result;
+ }
+
+ /**
+ * Read the entire stream into this array.
+ * @param in the stream to read from
+ * @throws IOException
+ */
+ public void readAll(InputStream in) throws IOException {
+ int currentChunk = length / chunkSize;
+ int currentOffset = length % chunkSize;
+ grow(currentChunk);
+ int currentLength = in.read(data[currentChunk], currentOffset,
+ chunkSize - currentOffset);
+ while (currentLength > 0) {
+ length += currentLength;
+ currentOffset = length % chunkSize;
+ if (currentOffset == 0) {
+ currentChunk = length / chunkSize;
+ grow(currentChunk);
+ }
+ currentLength = in.read(data[currentChunk], currentOffset,
+ chunkSize - currentOffset);
+ }
+ }
+
+ /**
+ * Byte compare a set of bytes against the bytes in this dynamic array.
+ * @param other source of the other bytes
+ * @param otherOffset start offset in the other array
+ * @param otherLength number of bytes in the other array
+ * @param ourOffset the offset in our array
+ * @param ourLength the number of bytes in our array
+ * @return negative for less, 0 for equal, positive for greater
+ */
+ public int compare(byte[] other, int otherOffset, int otherLength,
+ int ourOffset, int ourLength) {
+ int currentChunk = ourOffset / chunkSize;
+ int currentOffset = ourOffset % chunkSize;
+ int maxLength = Math.min(otherLength, ourLength);
+ while (maxLength > 0 &&
+ other[otherOffset] == data[currentChunk][currentOffset]) {
+ otherOffset += 1;
+ currentOffset += 1;
+ if (currentOffset == chunkSize) {
+ currentChunk += 1;
+ currentOffset = 0;
+ }
+ maxLength -= 1;
+ }
+ if (maxLength == 0) {
+ return otherLength - ourLength;
+ }
+ int otherByte = 0xff & other[otherOffset];
+ int ourByte = 0xff & data[currentChunk][currentOffset];
+ return otherByte > ourByte ? 1 : -1;
+ }
+
+ /**
+ * Get the size of the array.
+ * @return the number of bytes in the array
+ */
+ public int size() {
+ return length;
+ }
+
+ /**
+ * Clear the array to its original pristine state.
+ */
+ public void clear() {
+ length = 0;
+ for(int i=0; i < data.length; ++i) {
+ data[i] = null;
+ }
+ initializedChunks = 0;
+ }
+
+ /**
+ * Set a text value from the bytes in this dynamic array.
+ * @param result the value to set
+ * @param offset the start of the bytes to copy
+ * @param length the number of bytes to copy
+ */
+ public void setText(Text result, int offset, int length) {
+ result.clear();
+ int currentChunk = offset / chunkSize;
+ int currentOffset = offset % chunkSize;
+ int currentLength = Math.min(length, chunkSize - currentOffset);
+ while (length > 0) {
+ result.append(data[currentChunk], currentOffset, currentLength);
+ length -= currentLength;
+ currentChunk += 1;
+ currentOffset = 0;
+ currentLength = Math.min(length, chunkSize - currentOffset);
+ }
+ }
+
+ /**
+ * Write out a range of this dynamic array to an output stream.
+ * @param out the stream to write to
+ * @param offset the first offset to write
+ * @param length the number of bytes to write
+ * @throws IOException
+ */
+ public void write(OutputStream out, int offset,
+ int length) throws IOException {
+ int currentChunk = offset / chunkSize;
+ int currentOffset = offset % chunkSize;
+ while (length > 0) {
+ int currentLength = Math.min(length, chunkSize - currentOffset);
+ out.write(data[currentChunk], currentOffset, currentLength);
+ length -= currentLength;
+ currentChunk += 1;
+ currentOffset = 0;
+ }
+ }
+
+ public String toString() {
+ int i;
+ StringBuilder sb = new StringBuilder(length * 3);
+
+ sb.append('{');
+ int l = length - 1;
+ for (i=0; i<l; i++) {
+ sb.append(Integer.toHexString(get(i)));
+ sb.append(',');
+ }
+ sb.append(get(i));
+ sb.append('}');
+
+ return sb.toString();
+ }
+
+ public void setByteBuffer(ByteBuffer result, int offset, int length) {
+ result.clear();
+ int currentChunk = offset / chunkSize;
+ int currentOffset = offset % chunkSize;
+ int currentLength = Math.min(length, chunkSize - currentOffset);
+ while (length > 0) {
+ result.put(data[currentChunk], currentOffset, currentLength);
+ length -= currentLength;
+ currentChunk += 1;
+ currentOffset = 0;
+ currentLength = Math.min(length, chunkSize - currentOffset);
+ }
+ }
+}
+
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Dynamic int array that uses primitive types and chunks to avoid copying
+ * large number of integers when it resizes.
+ *
+ * The motivation for this class is memory optimization, i.e. space efficient
+ * storage of potentially huge arrays without good a-priori size guesses.
+ *
+ * The API of this class is between a primitive array and a AbstractList. It's
+ * not a Collection implementation because it handles primitive types, but the
+ * API could be extended to support iterators and the like.
+ *
+ * NOTE: Like standard Collection implementations/arrays, this class is not
+ * synchronized.
+ */
+final class DynamicIntArray {
+ static final int DEFAULT_CHUNKSIZE = 8 * 1024;
+ static final int INIT_CHUNKS = 128;
+
+ private final int chunkSize; // our allocation size
+ private int[][] data; // the real data
+ private int length; // max set element index +1
+ private int initializedChunks = 0; // the number of created chunks
+
+ public DynamicIntArray() {
+ this(DEFAULT_CHUNKSIZE);
+ }
+
+ public DynamicIntArray(int chunkSize) {
+ this.chunkSize = chunkSize;
+
+ data = new int[INIT_CHUNKS][];
+ }
+
+ /**
+ * Ensure that the given index is valid.
+ */
+ private void grow(int chunkIndex) {
+ if (chunkIndex >= initializedChunks) {
+ if (chunkIndex >= data.length) {
+ int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+ int[][] newChunk = new int[newSize][];
+ System.arraycopy(data, 0, newChunk, 0, data.length);
+ data = newChunk;
+ }
+ for (int i=initializedChunks; i <= chunkIndex; ++i) {
+ data[i] = new int[chunkSize];
+ }
+ initializedChunks = chunkIndex + 1;
+ }
+ }
+
+ public int get(int index) {
+ if (index >= length) {
+ throw new IndexOutOfBoundsException("Index " + index +
+ " is outside of 0.." +
+ (length - 1));
+ }
+ int i = index / chunkSize;
+ int j = index % chunkSize;
+ return data[i][j];
+ }
+
+ public void set(int index, int value) {
+ int i = index / chunkSize;
+ int j = index % chunkSize;
+ grow(i);
+ if (index >= length) {
+ length = index + 1;
+ }
+ data[i][j] = value;
+ }
+
+ public void increment(int index, int value) {
+ int i = index / chunkSize;
+ int j = index % chunkSize;
+ grow(i);
+ if (index >= length) {
+ length = index + 1;
+ }
+ data[i][j] += value;
+ }
+
+ public void add(int value) {
+ int i = length / chunkSize;
+ int j = length % chunkSize;
+ grow(i);
+ data[i][j] = value;
+ length += 1;
+ }
+
+ public int size() {
+ return length;
+ }
+
+ public void clear() {
+ length = 0;
+ for(int i=0; i < data.length; ++i) {
+ data[i] = null;
+ }
+ initializedChunks = 0;
+ }
+
+ public String toString() {
+ int i;
+ StringBuilder sb = new StringBuilder(length * 4);
+
+ sb.append('{');
+ int l = length - 1;
+ for (i=0; i<l; i++) {
+ sb.append(get(i));
+ sb.append(',');
+ }
+ sb.append(get(i));
+ sb.append('}');
+
+ return sb.toString();
+ }
+
+}
+
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A tool for printing out the file structure of ORC files.
+ */
+public final class FileDump {
+
+ // not used
+ private FileDump() {}
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = new Configuration();
+ for(String filename: args) {
+ System.out.println("Structure for " + filename);
+ Path path = new Path(filename);
+ Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
+ RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
+ System.out.println("Rows: " + reader.getNumberOfRows());
+ System.out.println("Compression: " + reader.getCompression());
+ if (reader.getCompression() != CompressionKind.NONE) {
+ System.out.println("Compression size: " + reader.getCompressionSize());
+ }
+ System.out.println("Type: " + reader.getObjectInspector().getTypeName());
+ ColumnStatistics[] stats = reader.getStatistics();
+ System.out.println("\nStatistics:");
+ for(int i=0; i < stats.length; ++i) {
+ System.out.println(" Column " + i + ": " + stats[i].toString());
+ }
+ System.out.println("\nStripes:");
+ for(StripeInformation stripe: reader.getStripes()) {
+ long stripeStart = stripe.getOffset();
+ System.out.println(" Stripe: " + stripe.toString());
+ OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+ long sectionStart = stripeStart;
+ for(OrcProto.Stream section: footer.getStreamsList()) {
+ System.out.println(" Stream: column " + section.getColumn() +
+ " section " + section.getKind() + " start: " + sectionStart +
+ " length " + section.getLength());
+ sectionStart += section.getLength();
+ }
+ for(int i=0; i < footer.getColumnsCount(); ++i) {
+ OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+ StringBuilder buf = new StringBuilder();
+ buf.append(" Encoding column ");
+ buf.append(i);
+ buf.append(": ");
+ buf.append(encoding.getKind());
+ if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY) {
+ buf.append("[");
+ buf.append(encoding.getDictionarySize());
+ buf.append("]");
+ }
+ System.out.println(buf);
+ }
+ }
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+abstract class InStream extends InputStream {
+
+ private static class UncompressedStream extends InStream {
+ private final String name;
+ private byte[] array;
+ private int offset;
+ private final int base;
+ private final int limit;
+
+ public UncompressedStream(String name, ByteBuffer input) {
+ this.name = name;
+ this.array = input.array();
+ base = input.arrayOffset() + input.position();
+ offset = base;
+ limit = input.arrayOffset() + input.limit();
+ }
+
+ @Override
+ public int read() {
+ if (offset == limit) {
+ return -1;
+ }
+ return 0xff & array[offset++];
+ }
+
+ @Override
+ public int read(byte[] data, int offset, int length) {
+ if (this.offset == limit) {
+ return -1;
+ }
+ int actualLength = Math.min(length, limit - this.offset);
+ System.arraycopy(array, this.offset, data, offset, actualLength);
+ this.offset += actualLength;
+ return actualLength;
+ }
+
+ @Override
+ public int available() {
+ return limit - offset;
+ }
+
+ @Override
+ public void close() {
+ array = null;
+ offset = 0;
+ }
+
+ @Override
+ public void seek(PositionProvider index) throws IOException {
+ offset = base + (int) index.getNext();
+ }
+
+ @Override
+ public String toString() {
+ return "uncompressed stream " + name + " base: " + base +
+ " offset: " + offset + " limit: " + limit;
+ }
+ }
+
+ private static class CompressedStream extends InStream {
+ private final String name;
+ private byte[] array;
+ private final int bufferSize;
+ private ByteBuffer uncompressed = null;
+ private final CompressionCodec codec;
+ private int offset;
+ private final int base;
+ private final int limit;
+ private boolean isUncompressedOriginal;
+
+ public CompressedStream(String name, ByteBuffer input,
+ CompressionCodec codec, int bufferSize
+ ) {
+ this.array = input.array();
+ this.name = name;
+ this.codec = codec;
+ this.bufferSize = bufferSize;
+ base = input.arrayOffset() + input.position();
+ offset = base;
+ limit = input.arrayOffset() + input.limit();
+ }
+
+ private void readHeader() throws IOException {
+ if (limit - offset > OutStream.HEADER_SIZE) {
+ int chunkLength = ((0xff & array[offset + 2]) << 15) |
+ ((0xff & array[offset + 1]) << 7) | ((0xff & array[offset]) >> 1);
+ if (chunkLength > bufferSize) {
+ throw new IllegalArgumentException("Buffer size too small. size = " +
+ bufferSize + " needed = " + chunkLength);
+ }
+ boolean isOriginal = (array[offset] & 0x01) == 1;
+ offset += OutStream.HEADER_SIZE;
+ if (isOriginal) {
+ isUncompressedOriginal = true;
+ uncompressed = ByteBuffer.wrap(array, offset, chunkLength);
+ } else {
+ if (isUncompressedOriginal) {
+ uncompressed = ByteBuffer.allocate(bufferSize);
+ isUncompressedOriginal = false;
+ } else if (uncompressed == null) {
+ uncompressed = ByteBuffer.allocate(bufferSize);
+ } else {
+ uncompressed.clear();
+ }
+ codec.decompress(ByteBuffer.wrap(array, offset, chunkLength),
+ uncompressed);
+ }
+ offset += chunkLength;
+ } else {
+ throw new IllegalStateException("Can't read header");
+ }
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (uncompressed == null || uncompressed.remaining() == 0) {
+ if (offset == limit) {
+ return -1;
+ }
+ readHeader();
+ }
+ return 0xff & uncompressed.get();
+ }
+
+ @Override
+ public int read(byte[] data, int offset, int length) throws IOException {
+ if (uncompressed == null || uncompressed.remaining() == 0) {
+ if (this.offset == this.limit) {
+ return -1;
+ }
+ readHeader();
+ }
+ int actualLength = Math.min(length, uncompressed.remaining());
+ System.arraycopy(uncompressed.array(),
+ uncompressed.arrayOffset() + uncompressed.position(), data,
+ offset, actualLength);
+ uncompressed.position(uncompressed.position() + actualLength);
+ return actualLength;
+ }
+
+ @Override
+ public int available() throws IOException {
+ if (uncompressed == null || uncompressed.remaining() == 0) {
+ if (offset == limit) {
+ return 0;
+ }
+ readHeader();
+ }
+ return uncompressed.remaining();
+ }
+
+ @Override
+ public void close() {
+ array = null;
+ uncompressed = null;
+ offset = 0;
+ }
+
+ @Override
+ public void seek(PositionProvider index) throws IOException {
+ offset = base + (int) index.getNext();
+ int uncompBytes = (int) index.getNext();
+ if (uncompBytes != 0) {
+ readHeader();
+ uncompressed.position(uncompressed.position() + uncompBytes);
+ } else if (uncompressed != null) {
+ uncompressed.position(uncompressed.limit());
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "compressed stream " + name + " base: " + base +
+ " offset: " + offset + " limit: " + limit +
+ (uncompressed == null ? "" :
+ " uncompressed: " + uncompressed.position() + " to " +
+ uncompressed.limit());
+ }
+ }
+
+ public abstract void seek(PositionProvider index) throws IOException;
+
+ public static InStream create(String name,
+ ByteBuffer input,
+ CompressionCodec codec,
+ int bufferSize) throws IOException {
+ if (codec == null) {
+ return new UncompressedStream(name, input);
+ } else {
+ return new CompressedStream(name, input, codec, bufferSize);
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for all of the integer columns, such as byte, short, int, and
+ * long.
+ */
+public interface IntegerColumnStatistics extends ColumnStatistics {
+ /**
+ * Get the smallest value in the column. Only defined if getNumberOfValues
+ * is non-zero.
+ * @return the minimum
+ */
+ long getMinimum();
+
+ /**
+ * Get the largest value in the column. Only defined if getNumberOfValues
+ * is non-zero.
+ * @return the maximum
+ */
+ long getMaximum();
+
+ /**
+ * Is the sum defined? If the sum overflowed the counter this will be false.
+ * @return is the sum available
+ */
+ boolean isSumDefined();
+
+ /**
+ * Get the sum of the column. Only valid if isSumDefined returns true.
+ * @return the sum of the column
+ */
+ long getSum();
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import java.io.IOException;
+
+/**
+ * Contains factory methods to read or write ORC files.
+ */
+public final class OrcFile {
+
+ public static final String MAGIC = "ORC";
+ public static final String COMPRESSION = "orc.compress";
+ static final String DEFAULT_COMPRESSION = "ZLIB";
+ public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";
+ static final String DEFAULT_COMPRESSION_BLOCK_SIZE = "262144";
+ public static final String STRIPE_SIZE = "orc.stripe.size";
+ static final String DEFAULT_STRIPE_SIZE = "268435456";
+ public static final String ROW_INDEX_STRIDE = "orc.row.index.stride";
+ static final String DEFAULT_ROW_INDEX_STRIDE = "10000";
+ public static final String ENABLE_INDEXES = "orc.create.index";
+
+ // unused
+ private OrcFile() {}
+
+ /**
+ * Create an ORC file reader.
+ * @param fs file system
+ * @param path file name to read from
+ * @return a new ORC file reader.
+ * @throws IOException
+ */
+ public static Reader createReader(FileSystem fs, Path path
+ ) throws IOException {
+ return new ReaderImpl(fs, path);
+ }
+
+ /**
+ * Create an ORC file streamFactory.
+ * @param fs file system
+ * @param path filename to write to
+ * @param inspector the ObjectInspector that inspects the rows
+ * @param stripeSize the number of bytes in a stripe
+ * @param compress how to compress the file
+ * @param bufferSize the number of bytes to compress at once
+ * @param rowIndexStride the number of rows between row index entries or
+ * 0 to suppress all indexes
+ * @return a new ORC file streamFactory
+ * @throws IOException
+ */
+ public static Writer createWriter(FileSystem fs,
+ Path path,
+ ObjectInspector inspector,
+ long stripeSize,
+ CompressionKind compress,
+ int bufferSize,
+ int rowIndexStride) throws IOException {
+ return new WriterImpl(fs, path, inspector, stripeSize, compress,
+ bufferSize, rowIndexStride);
+ }
+
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A MapReduce/Hive input format for ORC files.
+ */
+public class OrcInputFormat extends FileInputFormat<NullWritable, OrcStruct>
+ implements InputFormatChecker {
+
+ private static class OrcRecordReader
+ implements RecordReader<NullWritable, OrcStruct> {
+ private final org.apache.hadoop.hive.ql.io.orc.RecordReader reader;
+ private final long offset;
+ private final long length;
+ private final OrcStruct row;
+ private boolean firstRow = true;
+ private float progress = 0.0f;
+
+ OrcRecordReader(Reader file, Configuration conf,
+ long offset, long length) throws IOException {
+ this.reader = file.rows(offset, length,
+ findIncludedColumns(file.getTypes(), conf));
+ this.offset = offset;
+ this.length = length;
+ if (reader.hasNext()) {
+ row = (OrcStruct) reader.next(null);
+ } else {
+ row = null;
+ }
+ }
+
+ @Override
+ public boolean next(NullWritable key, OrcStruct value) throws IOException {
+ if (firstRow) {
+ firstRow = false;
+ assert value == row: "User didn't pass our value back " + value;
+ return row != null;
+ } else if (reader.hasNext()) {
+ Object obj = reader.next(value);
+ progress = reader.getProgress();
+ assert obj == value : "Reader returned different object " + obj;
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public NullWritable createKey() {
+ return NullWritable.get();
+ }
+
+ @Override
+ public OrcStruct createValue() {
+ return row;
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return offset + (long) (progress * length);
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return progress;
+ }
+ }
+
+ public OrcInputFormat() {
+ // just set a really small lower bound
+ setMinSplitSize(16 * 1024);
+ }
+
+ /**
+ * Recurse down into a type subtree turning on all of the sub-columns.
+ * @param types the types of the file
+ * @param result the global view of columns that should be included
+ * @param typeId the root of tree to enable
+ */
+ private static void includeColumnRecursive(List<OrcProto.Type> types,
+ boolean[] result,
+ int typeId) {
+ result[typeId] = true;
+ OrcProto.Type type = types.get(typeId);
+ int children = type.getSubtypesCount();
+ for(int i=0; i < children; ++i) {
+ includeColumnRecursive(types, result, type.getSubtypes(i));
+ }
+ }
+
+ /**
+ * Take the configuration and figure out which columns we need to include.
+ * @param types the types of the file
+ * @param conf the configuration
+ * @return true for each column that should be included
+ */
+ private static boolean[] findIncludedColumns(List<OrcProto.Type> types,
+ Configuration conf) {
+ String includedStr =
+ conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
+ if (includedStr == null) {
+ return null;
+ } else {
+ int numColumns = types.size();
+ boolean[] result = new boolean[numColumns];
+ result[0] = true;
+ OrcProto.Type root = types.get(0);
+ List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
+ for(int i=0; i < root.getSubtypesCount(); ++i) {
+ if (included.contains(i)) {
+ includeColumnRecursive(types, result, root.getSubtypes(i));
+ }
+ }
+ // if we are filtering at least one column, return the boolean array
+ for(boolean include: result) {
+ if (!include) {
+ return result;
+ }
+ }
+ return null;
+ }
+ }
+
+ @Override
+ public RecordReader<NullWritable, OrcStruct>
+ getRecordReader(InputSplit inputSplit, JobConf conf,
+ Reporter reporter) throws IOException {
+ FileSplit fileSplit = (FileSplit) inputSplit;
+ Path path = fileSplit.getPath();
+ FileSystem fs = path.getFileSystem(conf);
+ reporter.setStatus(fileSplit.toString());
+ return new OrcRecordReader(OrcFile.createReader(fs, path), conf,
+ fileSplit.getStart(), fileSplit.getLength());
+ }
+
+ @Override
+ public boolean validateInput(FileSystem fs, HiveConf conf,
+ ArrayList<FileStatus> files
+ ) throws IOException {
+ if (files.size() <= 0) {
+ return false;
+ }
+ for (FileStatus file : files) {
+ try {
+ OrcFile.createReader(fs, file.getPath());
+ } catch (IOException e) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Properties;
+
+/**
+ * A Hive OutputFormat for ORC files.
+ */
+public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
+ implements HiveOutputFormat<NullWritable, OrcSerdeRow> {
+
+ private static class OrcRecordWriter
+ implements RecordWriter<NullWritable, OrcSerdeRow>,
+ FileSinkOperator.RecordWriter {
+ private Writer writer = null;
+ private final FileSystem fs;
+ private final Path path;
+ private final Configuration conf;
+ private final long stripeSize;
+ private final int compressionSize;
+ private final CompressionKind compress;
+ private final int rowIndexStride;
+
+ OrcRecordWriter(FileSystem fs, Path path, Configuration conf,
+ String stripeSize, String compress,
+ String compressionSize, String rowIndexStride) {
+ this.fs = fs;
+ this.path = path;
+ this.conf = conf;
+ this.stripeSize = Long.valueOf(stripeSize);
+ this.compress = CompressionKind.valueOf(compress);
+ this.compressionSize = Integer.valueOf(compressionSize);
+ this.rowIndexStride = Integer.valueOf(rowIndexStride);
+ }
+
+ @Override
+ public void write(NullWritable nullWritable,
+ OrcSerdeRow row) throws IOException {
+ if (writer == null) {
+ writer = OrcFile.createWriter(fs, path, row.getInspector(), stripeSize,
+ compress, compressionSize, rowIndexStride);
+ }
+ writer.addRow(row.getRow());
+ }
+
+ @Override
+ public void write(Writable row) throws IOException {
+ OrcSerdeRow serdeRow = (OrcSerdeRow) row;
+ if (writer == null) {
+ writer = OrcFile.createWriter(fs, path, serdeRow.getInspector(),
+ stripeSize, compress, compressionSize, rowIndexStride);
+ }
+ writer.addRow(serdeRow.getRow());
+ }
+
+ @Override
+ public void close(Reporter reporter) throws IOException {
+ close(true);
+ }
+
+ @Override
+ public void close(boolean b) throws IOException {
+ // if we haven't written any rows, we need to create a file with a
+ // generic schema.
+ if (writer == null) {
+ // a row with no columns
+ ObjectInspector inspector = ObjectInspectorFactory.
+ getStandardStructObjectInspector(new ArrayList<String>(),
+ new ArrayList<ObjectInspector>());
+ writer = OrcFile.createWriter(fs, path, inspector, stripeSize,
+ compress, compressionSize, rowIndexStride);
+ }
+ writer.close();
+ }
+ }
+
+ @Override
+ public RecordWriter<NullWritable, OrcSerdeRow>
+ getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
+ Progressable reporter) throws IOException {
+ return new OrcRecordWriter(fileSystem, new Path(name), conf,
+ OrcFile.DEFAULT_STRIPE_SIZE, OrcFile.DEFAULT_COMPRESSION,
+ OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE, OrcFile.DEFAULT_ROW_INDEX_STRIDE);
+ }
+
+ @Override
+ public FileSinkOperator.RecordWriter
+ getHiveRecordWriter(JobConf conf,
+ Path path,
+ Class<? extends Writable> valueClass,
+ boolean isCompressed,
+ Properties tableProperties,
+ Progressable reporter) throws IOException {
+ String stripeSize = tableProperties.getProperty(OrcFile.STRIPE_SIZE,
+ OrcFile.DEFAULT_STRIPE_SIZE);
+ String compression = tableProperties.getProperty(OrcFile.COMPRESSION,
+ OrcFile.DEFAULT_COMPRESSION);
+ String compressionSize =
+ tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE,
+ OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE);
+ String rowIndexStride =
+ tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE,
+ OrcFile.DEFAULT_ROW_INDEX_STRIDE);
+ if ("false".equals(tableProperties.getProperty(OrcFile.ENABLE_INDEXES))) {
+ rowIndexStride = "0";
+ }
+ return new OrcRecordWriter(path.getFileSystem(conf), path, conf,
+ stripeSize, compression, compressionSize, rowIndexStride);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Properties;
+
+/**
+ * A serde class for ORC.
+ * It transparently passes the object to/from the ORC file reader/writer.
+ */
+public class OrcSerde implements SerDe {
+ private final OrcSerdeRow row = new OrcSerdeRow();
+ private ObjectInspector inspector = null;
+
+ final class OrcSerdeRow implements Writable {
+ private Object realRow;
+ private ObjectInspector inspector;
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ throw new UnsupportedOperationException("can't write the bundle");
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ throw new UnsupportedOperationException("can't read the bundle");
+ }
+
+ ObjectInspector getInspector() {
+ return inspector;
+ }
+
+ Object getRow() {
+ return realRow;
+ }
+ }
+
+ @Override
+ public void initialize(Configuration conf, Properties table) {
+ // Read the configuration parameters
+ String columnNameProperty = table.getProperty("columns");
+ // NOTE: if "columns.types" is missing, all columns will be of String type
+ String columnTypeProperty = table.getProperty("columns.types");
+
+ // Parse the configuration parameters
+ ArrayList<String> columnNames = new ArrayList<String>();
+ if (columnNameProperty != null && columnNameProperty.length() > 0) {
+ for(String name: columnNameProperty.split(",")) {
+ columnNames.add(name);
+ }
+ }
+ if (columnTypeProperty == null) {
+ // Default type: all string
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < columnNames.size(); i++) {
+ if (i > 0) {
+ sb.append(":");
+ }
+ sb.append("string");
+ }
+ columnTypeProperty = sb.toString();
+ }
+
+ ArrayList<TypeInfo> fieldTypes =
+ TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ StructTypeInfo rootType = new StructTypeInfo();
+ rootType.setAllStructFieldNames(columnNames);
+ rootType.setAllStructFieldTypeInfos(fieldTypes);
+ inspector = OrcStruct.createObjectInspector(rootType);
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return OrcSerdeRow.class;
+ }
+
+ @Override
+ public Writable serialize(Object realRow, ObjectInspector inspector) {
+ row.realRow = realRow;
+ row.inspector = inspector;
+ return row;
+ }
+
+ @Override
+ public Object deserialize(Writable writable) throws SerDeException {
+ return writable;
+ }
+
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return inspector;
+ }
+
+ /**
+ * Always returns null, since serialized size doesn't make sense in the
+ * context of ORC files.
+ * @return null
+ */
+ @Override
+ public SerDeStats getSerDeStats() {
+ return null;
+ }
+}