You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/07/19 16:58:56 UTC
[33/37] hive git commit: HIVE-17118. Move the hive-orc source files
to make the package names unique.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/StripeInformation.java b/orc/src/java/org/apache/hive/orc/StripeInformation.java
new file mode 100644
index 0000000..b8dfc60
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/StripeInformation.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Information about the stripes in an ORC file that is provided by the Reader.
+ */
+public interface StripeInformation {
+ /**
+ * Get the byte offset of the start of the stripe.
+ * @return the bytes from the start of the file
+ */
+ long getOffset();
+
+ /**
+ * Get the total length of the stripe in bytes.
+ * @return the number of bytes in the stripe
+ */
+ long getLength();
+
+ /**
+ * Get the length of the stripe's indexes.
+ * @return the number of bytes in the index
+ */
+ long getIndexLength();
+
+ /**
+ * Get the length of the stripe's data.
+ * @return the number of bytes in the stripe
+ */
+ long getDataLength();
+
+ /**
+ * Get the length of the stripe's tail section, which contains its index.
+ * @return the number of bytes in the tail
+ */
+ long getFooterLength();
+
+ /**
+ * Get the number of rows in the stripe.
+ * @return a count of the number of rows
+ */
+ long getNumberOfRows();
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/StripeStatistics.java b/orc/src/java/org/apache/hive/orc/StripeStatistics.java
new file mode 100644
index 0000000..c704dd9
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/StripeStatistics.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hive.orc.impl.ColumnStatisticsImpl;
+
+import java.util.List;
+
+public class StripeStatistics {
+ private final List<OrcProto.ColumnStatistics> cs;
+
+ public StripeStatistics(List<OrcProto.ColumnStatistics> list) {
+ this.cs = list;
+ }
+
+ /**
+ * Return list of column statistics
+ *
+ * @return column stats
+ */
+ public ColumnStatistics[] getColumnStatistics() {
+ ColumnStatistics[] result = new ColumnStatistics[cs.size()];
+ for (int i = 0; i < result.length; ++i) {
+ result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
+ }
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java b/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java
new file mode 100644
index 0000000..55a7e42
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.sql.Timestamp;
+
+/**
+ * Statistics for Timestamp columns.
+ */
+public interface TimestampColumnStatistics extends ColumnStatistics {
+ /**
+ * Get the minimum value for the column.
+ * @return minimum value
+ */
+ Timestamp getMinimum();
+
+ /**
+ * Get the maximum value for the column.
+ * @return maximum value
+ */
+ Timestamp getMaximum();
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/TypeDescription.java b/orc/src/java/org/apache/hive/orc/TypeDescription.java
new file mode 100644
index 0000000..f4ed908
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/TypeDescription.java
@@ -0,0 +1,870 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This is the description of the types in an ORC file.
+ */
+public class TypeDescription
+ implements Comparable<TypeDescription>, Serializable {
+ private static final int MAX_PRECISION = 38;
+ private static final int MAX_SCALE = 38;
+ private static final int DEFAULT_PRECISION = 38;
+ private static final int DEFAULT_SCALE = 10;
+ private static final int DEFAULT_LENGTH = 256;
+
+ @Override
+ public int compareTo(TypeDescription other) {
+ if (this == other) {
+ return 0;
+ } else if (other == null) {
+ return -1;
+ } else {
+ int result = category.compareTo(other.category);
+ if (result == 0) {
+ switch (category) {
+ case CHAR:
+ case VARCHAR:
+ return maxLength - other.maxLength;
+ case DECIMAL:
+ if (precision != other.precision) {
+ return precision - other.precision;
+ }
+ return scale - other.scale;
+ case UNION:
+ case LIST:
+ case MAP:
+ if (children.size() != other.children.size()) {
+ return children.size() - other.children.size();
+ }
+ for(int c=0; result == 0 && c < children.size(); ++c) {
+ result = children.get(c).compareTo(other.children.get(c));
+ }
+ break;
+ case STRUCT:
+ if (children.size() != other.children.size()) {
+ return children.size() - other.children.size();
+ }
+ for(int c=0; result == 0 && c < children.size(); ++c) {
+ result = fieldNames.get(c).compareTo(other.fieldNames.get(c));
+ if (result == 0) {
+ result = children.get(c).compareTo(other.children.get(c));
+ }
+ }
+ break;
+ default:
+ // PASS
+ }
+ }
+ return result;
+ }
+ }
+
+ public enum Category {
+ BOOLEAN("boolean", true),
+ BYTE("tinyint", true),
+ SHORT("smallint", true),
+ INT("int", true),
+ LONG("bigint", true),
+ FLOAT("float", true),
+ DOUBLE("double", true),
+ STRING("string", true),
+ DATE("date", true),
+ TIMESTAMP("timestamp", true),
+ BINARY("binary", true),
+ DECIMAL("decimal", true),
+ VARCHAR("varchar", true),
+ CHAR("char", true),
+ LIST("array", false),
+ MAP("map", false),
+ STRUCT("struct", false),
+ UNION("uniontype", false);
+
+ Category(String name, boolean isPrimitive) {
+ this.name = name;
+ this.isPrimitive = isPrimitive;
+ }
+
+ final boolean isPrimitive;
+ final String name;
+
+ public boolean isPrimitive() {
+ return isPrimitive;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ public static TypeDescription createBoolean() {
+ return new TypeDescription(Category.BOOLEAN);
+ }
+
+ public static TypeDescription createByte() {
+ return new TypeDescription(Category.BYTE);
+ }
+
+ public static TypeDescription createShort() {
+ return new TypeDescription(Category.SHORT);
+ }
+
+ public static TypeDescription createInt() {
+ return new TypeDescription(Category.INT);
+ }
+
+ public static TypeDescription createLong() {
+ return new TypeDescription(Category.LONG);
+ }
+
+ public static TypeDescription createFloat() {
+ return new TypeDescription(Category.FLOAT);
+ }
+
+ public static TypeDescription createDouble() {
+ return new TypeDescription(Category.DOUBLE);
+ }
+
+ public static TypeDescription createString() {
+ return new TypeDescription(Category.STRING);
+ }
+
+ public static TypeDescription createDate() {
+ return new TypeDescription(Category.DATE);
+ }
+
+ public static TypeDescription createTimestamp() {
+ return new TypeDescription(Category.TIMESTAMP);
+ }
+
+ public static TypeDescription createBinary() {
+ return new TypeDescription(Category.BINARY);
+ }
+
+ public static TypeDescription createDecimal() {
+ return new TypeDescription(Category.DECIMAL);
+ }
+
+ static class StringPosition {
+ final String value;
+ int position;
+ final int length;
+
+ StringPosition(String value) {
+ this.value = value;
+ position = 0;
+ length = value.length();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append('\'');
+ buffer.append(value.substring(0, position));
+ buffer.append('^');
+ buffer.append(value.substring(position));
+ buffer.append('\'');
+ return buffer.toString();
+ }
+ }
+
+ static Category parseCategory(StringPosition source) {
+ int start = source.position;
+ while (source.position < source.length) {
+ char ch = source.value.charAt(source.position);
+ if (!Character.isLetter(ch)) {
+ break;
+ }
+ source.position += 1;
+ }
+ if (source.position != start) {
+ String word = source.value.substring(start, source.position).toLowerCase();
+ for (Category cat : Category.values()) {
+ if (cat.getName().equals(word)) {
+ return cat;
+ }
+ }
+ }
+ throw new IllegalArgumentException("Can't parse category at " + source);
+ }
+
+ static int parseInt(StringPosition source) {
+ int start = source.position;
+ int result = 0;
+ while (source.position < source.length) {
+ char ch = source.value.charAt(source.position);
+ if (!Character.isDigit(ch)) {
+ break;
+ }
+ result = result * 10 + (ch - '0');
+ source.position += 1;
+ }
+ if (source.position == start) {
+ throw new IllegalArgumentException("Missing integer at " + source);
+ }
+ return result;
+ }
+
+ static String parseName(StringPosition source) {
+ int start = source.position;
+ while (source.position < source.length) {
+ char ch = source.value.charAt(source.position);
+ if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
+ break;
+ }
+ source.position += 1;
+ }
+ if (source.position == start) {
+ throw new IllegalArgumentException("Missing name at " + source);
+ }
+ return source.value.substring(start, source.position);
+ }
+
+ static void requireChar(StringPosition source, char required) {
+ if (source.position >= source.length ||
+ source.value.charAt(source.position) != required) {
+ throw new IllegalArgumentException("Missing required char '" +
+ required + "' at " + source);
+ }
+ source.position += 1;
+ }
+
+ static boolean consumeChar(StringPosition source, char ch) {
+ boolean result = source.position < source.length &&
+ source.value.charAt(source.position) == ch;
+ if (result) {
+ source.position += 1;
+ }
+ return result;
+ }
+
+ static void parseUnion(TypeDescription type, StringPosition source) {
+ requireChar(source, '<');
+ do {
+ type.addUnionChild(parseType(source));
+ } while (consumeChar(source, ','));
+ requireChar(source, '>');
+ }
+
+ static void parseStruct(TypeDescription type, StringPosition source) {
+ requireChar(source, '<');
+ do {
+ String fieldName = parseName(source);
+ requireChar(source, ':');
+ type.addField(fieldName, parseType(source));
+ } while (consumeChar(source, ','));
+ requireChar(source, '>');
+ }
+
+ static TypeDescription parseType(StringPosition source) {
+ TypeDescription result = new TypeDescription(parseCategory(source));
+ switch (result.getCategory()) {
+ case BINARY:
+ case BOOLEAN:
+ case BYTE:
+ case DATE:
+ case DOUBLE:
+ case FLOAT:
+ case INT:
+ case LONG:
+ case SHORT:
+ case STRING:
+ case TIMESTAMP:
+ break;
+ case CHAR:
+ case VARCHAR:
+ requireChar(source, '(');
+ result.withMaxLength(parseInt(source));
+ requireChar(source, ')');
+ break;
+ case DECIMAL: {
+ requireChar(source, '(');
+ int precision = parseInt(source);
+ requireChar(source, ',');
+ result.withScale(parseInt(source));
+ result.withPrecision(precision);
+ requireChar(source, ')');
+ break;
+ }
+ case LIST:
+ requireChar(source, '<');
+ result.children.add(parseType(source));
+ requireChar(source, '>');
+ break;
+ case MAP:
+ requireChar(source, '<');
+ result.children.add(parseType(source));
+ requireChar(source, ',');
+ result.children.add(parseType(source));
+ requireChar(source, '>');
+ break;
+ case UNION:
+ parseUnion(result, source);
+ break;
+ case STRUCT:
+ parseStruct(result, source);
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown type " +
+ result.getCategory() + " at " + source);
+ }
+ return result;
+ }
+
+ /**
+ * Parse TypeDescription from the Hive type names. This is the inverse
+ * of TypeDescription.toString()
+ * @param typeName the name of the type
+ * @return a new TypeDescription or null if typeName was null
+ * @throws IllegalArgumentException if the string is badly formed
+ */
+ public static TypeDescription fromString(String typeName) {
+ if (typeName == null) {
+ return null;
+ }
+ StringPosition source = new StringPosition(typeName);
+ TypeDescription result = parseType(source);
+ if (source.position != source.length) {
+ throw new IllegalArgumentException("Extra characters at " + source);
+ }
+ return result;
+ }
+
+ /**
+ * For decimal types, set the precision.
+ * @param precision the new precision
+ * @return this
+ */
+ public TypeDescription withPrecision(int precision) {
+ if (category != Category.DECIMAL) {
+ throw new IllegalArgumentException("precision is only allowed on decimal"+
+ " and not " + category.name);
+ } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
+ throw new IllegalArgumentException("precision " + precision +
+ " is out of range 1 .. " + scale);
+ }
+ this.precision = precision;
+ return this;
+ }
+
+ /**
+ * For decimal types, set the scale.
+ * @param scale the new scale
+ * @return this
+ */
+ public TypeDescription withScale(int scale) {
+ if (category != Category.DECIMAL) {
+ throw new IllegalArgumentException("scale is only allowed on decimal"+
+ " and not " + category.name);
+ } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
+ throw new IllegalArgumentException("scale is out of range at " + scale);
+ }
+ this.scale = scale;
+ return this;
+ }
+
+ public static TypeDescription createVarchar() {
+ return new TypeDescription(Category.VARCHAR);
+ }
+
+ public static TypeDescription createChar() {
+ return new TypeDescription(Category.CHAR);
+ }
+
+ /**
+ * Set the maximum length for char and varchar types.
+ * @param maxLength the maximum value
+ * @return this
+ */
+ public TypeDescription withMaxLength(int maxLength) {
+ if (category != Category.VARCHAR && category != Category.CHAR) {
+ throw new IllegalArgumentException("maxLength is only allowed on char" +
+ " and varchar and not " + category.name);
+ }
+ this.maxLength = maxLength;
+ return this;
+ }
+
+ public static TypeDescription createList(TypeDescription childType) {
+ TypeDescription result = new TypeDescription(Category.LIST);
+ result.children.add(childType);
+ childType.parent = result;
+ return result;
+ }
+
+ public static TypeDescription createMap(TypeDescription keyType,
+ TypeDescription valueType) {
+ TypeDescription result = new TypeDescription(Category.MAP);
+ result.children.add(keyType);
+ result.children.add(valueType);
+ keyType.parent = result;
+ valueType.parent = result;
+ return result;
+ }
+
+ public static TypeDescription createUnion() {
+ return new TypeDescription(Category.UNION);
+ }
+
+ public static TypeDescription createStruct() {
+ return new TypeDescription(Category.STRUCT);
+ }
+
+ /**
+ * Add a child to a union type.
+ * @param child a new child type to add
+ * @return the union type.
+ */
+ public TypeDescription addUnionChild(TypeDescription child) {
+ if (category != Category.UNION) {
+ throw new IllegalArgumentException("Can only add types to union type" +
+ " and not " + category);
+ }
+ children.add(child);
+ child.parent = this;
+ return this;
+ }
+
+ /**
+ * Add a field to a struct type as it is built.
+ * @param field the field name
+ * @param fieldType the type of the field
+ * @return the struct type
+ */
+ public TypeDescription addField(String field, TypeDescription fieldType) {
+ if (category != Category.STRUCT) {
+ throw new IllegalArgumentException("Can only add fields to struct type" +
+ " and not " + category);
+ }
+ fieldNames.add(field);
+ children.add(fieldType);
+ fieldType.parent = this;
+ return this;
+ }
+
+ /**
+ * Get the id for this type.
+ * The first call will cause all of the the ids in tree to be assigned, so
+ * it should not be called before the type is completely built.
+ * @return the sequential id
+ */
+ public int getId() {
+ // if the id hasn't been assigned, assign all of the ids from the root
+ if (id == -1) {
+ TypeDescription root = this;
+ while (root.parent != null) {
+ root = root.parent;
+ }
+ root.assignIds(0);
+ }
+ return id;
+ }
+
+ public TypeDescription clone() {
+ TypeDescription result = new TypeDescription(category);
+ result.maxLength = maxLength;
+ result.precision = precision;
+ result.scale = scale;
+ if (fieldNames != null) {
+ result.fieldNames.addAll(fieldNames);
+ }
+ if (children != null) {
+ for(TypeDescription child: children) {
+ TypeDescription clone = child.clone();
+ clone.parent = result;
+ result.children.add(clone);
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public int hashCode() {
+ long result = category.ordinal() * 4241 + maxLength + precision * 13 + scale;
+ if (children != null) {
+ for(TypeDescription child: children) {
+ result = result * 6959 + child.hashCode();
+ }
+ }
+ return (int) result;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !(other instanceof TypeDescription)) {
+ return false;
+ }
+ if (other == this) {
+ return true;
+ }
+ TypeDescription castOther = (TypeDescription) other;
+ if (category != castOther.category ||
+ maxLength != castOther.maxLength ||
+ scale != castOther.scale ||
+ precision != castOther.precision) {
+ return false;
+ }
+ if (children != null) {
+ if (children.size() != castOther.children.size()) {
+ return false;
+ }
+ for (int i = 0; i < children.size(); ++i) {
+ if (!children.get(i).equals(castOther.children.get(i))) {
+ return false;
+ }
+ }
+ }
+ if (category == Category.STRUCT) {
+ for(int i=0; i < fieldNames.size(); ++i) {
+ if (!fieldNames.get(i).equals(castOther.fieldNames.get(i))) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Get the maximum id assigned to this type or its children.
+ * The first call will cause all of the the ids in tree to be assigned, so
+ * it should not be called before the type is completely built.
+ * @return the maximum id assigned under this type
+ */
+ public int getMaximumId() {
+ // if the id hasn't been assigned, assign all of the ids from the root
+ if (maxId == -1) {
+ TypeDescription root = this;
+ while (root.parent != null) {
+ root = root.parent;
+ }
+ root.assignIds(0);
+ }
+ return maxId;
+ }
+
+ private ColumnVector createColumn(int maxSize) {
+ switch (category) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case DATE:
+ return new LongColumnVector(maxSize);
+ case TIMESTAMP:
+ return new TimestampColumnVector(maxSize);
+ case FLOAT:
+ case DOUBLE:
+ return new DoubleColumnVector(maxSize);
+ case DECIMAL:
+ return new DecimalColumnVector(maxSize, precision, scale);
+ case STRING:
+ case BINARY:
+ case CHAR:
+ case VARCHAR:
+ return new BytesColumnVector(maxSize);
+ case STRUCT: {
+ ColumnVector[] fieldVector = new ColumnVector[children.size()];
+ for(int i=0; i < fieldVector.length; ++i) {
+ fieldVector[i] = children.get(i).createColumn(maxSize);
+ }
+ return new StructColumnVector(maxSize,
+ fieldVector);
+ }
+ case UNION: {
+ ColumnVector[] fieldVector = new ColumnVector[children.size()];
+ for(int i=0; i < fieldVector.length; ++i) {
+ fieldVector[i] = children.get(i).createColumn(maxSize);
+ }
+ return new UnionColumnVector(maxSize,
+ fieldVector);
+ }
+ case LIST:
+ return new ListColumnVector(maxSize,
+ children.get(0).createColumn(maxSize));
+ case MAP:
+ return new MapColumnVector(maxSize,
+ children.get(0).createColumn(maxSize),
+ children.get(1).createColumn(maxSize));
+ default:
+ throw new IllegalArgumentException("Unknown type " + category);
+ }
+ }
+
+ public VectorizedRowBatch createRowBatch(int maxSize) {
+ VectorizedRowBatch result;
+ if (category == Category.STRUCT) {
+ result = new VectorizedRowBatch(children.size(), maxSize);
+ for(int i=0; i < result.cols.length; ++i) {
+ result.cols[i] = children.get(i).createColumn(maxSize);
+ }
+ } else {
+ result = new VectorizedRowBatch(1, maxSize);
+ result.cols[0] = createColumn(maxSize);
+ }
+ result.reset();
+ return result;
+ }
+
+ public VectorizedRowBatch createRowBatch() {
+ return createRowBatch(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Get the kind of this type.
+ * @return get the category for this type.
+ */
+ public Category getCategory() {
+ return category;
+ }
+
+ /**
+ * Get the maximum length of the type. Only used for char and varchar types.
+ * @return the maximum length of the string type
+ */
+ public int getMaxLength() {
+ return maxLength;
+ }
+
+ /**
+ * Get the precision of the decimal type.
+ * @return the number of digits for the precision.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * Get the scale of the decimal type.
+ * @return the number of digits for the scale.
+ */
+ public int getScale() {
+ return scale;
+ }
+
+ /**
+ * For struct types, get the list of field names.
+ * @return the list of field names.
+ */
+ public List<String> getFieldNames() {
+ return Collections.unmodifiableList(fieldNames);
+ }
+
+ /**
+ * Get the subtypes of this type.
+ * @return the list of children types
+ */
+ public List<TypeDescription> getChildren() {
+ return children == null ? null : Collections.unmodifiableList(children);
+ }
+
+ /**
+ * Assign ids to all of the nodes under this one.
+ * @param startId the lowest id to assign
+ * @return the next available id
+ */
+ private int assignIds(int startId) {
+ id = startId++;
+ if (children != null) {
+ for (TypeDescription child : children) {
+ startId = child.assignIds(startId);
+ }
+ }
+ maxId = startId - 1;
+ return startId;
+ }
+
+ private TypeDescription(Category category) {
+ this.category = category;
+ if (category.isPrimitive) {
+ children = null;
+ } else {
+ children = new ArrayList<>();
+ }
+ if (category == Category.STRUCT) {
+ fieldNames = new ArrayList<>();
+ } else {
+ fieldNames = null;
+ }
+ }
+
+ private int id = -1;
+ private int maxId = -1;
+ private TypeDescription parent;
+ private final Category category;
+ private final List<TypeDescription> children;
+ private final List<String> fieldNames;
+ private int maxLength = DEFAULT_LENGTH;
+ private int precision = DEFAULT_PRECISION;
+ private int scale = DEFAULT_SCALE;
+
+ public void printToBuffer(StringBuilder buffer) {
+ buffer.append(category.name);
+ switch (category) {
+ case DECIMAL:
+ buffer.append('(');
+ buffer.append(precision);
+ buffer.append(',');
+ buffer.append(scale);
+ buffer.append(')');
+ break;
+ case CHAR:
+ case VARCHAR:
+ buffer.append('(');
+ buffer.append(maxLength);
+ buffer.append(')');
+ break;
+ case LIST:
+ case MAP:
+ case UNION:
+ buffer.append('<');
+ for(int i=0; i < children.size(); ++i) {
+ if (i != 0) {
+ buffer.append(',');
+ }
+ children.get(i).printToBuffer(buffer);
+ }
+ buffer.append('>');
+ break;
+ case STRUCT:
+ buffer.append('<');
+ for(int i=0; i < children.size(); ++i) {
+ if (i != 0) {
+ buffer.append(',');
+ }
+ buffer.append(fieldNames.get(i));
+ buffer.append(':');
+ children.get(i).printToBuffer(buffer);
+ }
+ buffer.append('>');
+ break;
+ default:
+ break;
+ }
+ }
+
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ printToBuffer(buffer);
+ return buffer.toString();
+ }
+
+ private void printJsonToBuffer(String prefix, StringBuilder buffer,
+ int indent) {
+ for(int i=0; i < indent; ++i) {
+ buffer.append(' ');
+ }
+ buffer.append(prefix);
+ buffer.append("{\"category\": \"");
+ buffer.append(category.name);
+ buffer.append("\", \"id\": ");
+ buffer.append(getId());
+ buffer.append(", \"max\": ");
+ buffer.append(maxId);
+ switch (category) {
+ case DECIMAL:
+ buffer.append(", \"precision\": ");
+ buffer.append(precision);
+ buffer.append(", \"scale\": ");
+ buffer.append(scale);
+ break;
+ case CHAR:
+ case VARCHAR:
+ buffer.append(", \"length\": ");
+ buffer.append(maxLength);
+ break;
+ case LIST:
+ case MAP:
+ case UNION:
+ buffer.append(", \"children\": [");
+ for(int i=0; i < children.size(); ++i) {
+ buffer.append('\n');
+ children.get(i).printJsonToBuffer("", buffer, indent + 2);
+ if (i != children.size() - 1) {
+ buffer.append(',');
+ }
+ }
+ buffer.append("]");
+ break;
+ case STRUCT:
+ buffer.append(", \"fields\": [");
+ for(int i=0; i < children.size(); ++i) {
+ buffer.append('\n');
+ children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
+ buffer, indent + 2);
+ if (i != children.size() - 1) {
+ buffer.append(',');
+ }
+ }
+ buffer.append(']');
+ break;
+ default:
+ break;
+ }
+ buffer.append('}');
+ }
+
+ public String toJson() {
+ StringBuilder buffer = new StringBuilder();
+ printJsonToBuffer("", buffer, 0);
+ return buffer.toString();
+ }
+
+ /**
+ * Locate a subtype by its id.
+ * @param goal the column id to look for
+ * @return the subtype
+ */
+ public TypeDescription findSubtype(int goal) {
+ // call getId method to make sure the ids are assigned
+ int id = getId();
+ if (goal < id || goal > maxId) {
+ throw new IllegalArgumentException("Unknown type id " + id + " in " +
+ toJson());
+ }
+ if (goal == id) {
+ return this;
+ } else {
+ TypeDescription prev = null;
+ for(TypeDescription next: children) {
+ if (next.id > goal) {
+ return prev.findSubtype(goal);
+ }
+ prev = next;
+ }
+ return prev.findSubtype(goal);
+ }
+ }}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/Writer.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/Writer.java b/orc/src/java/org/apache/hive/orc/Writer.java
new file mode 100644
index 0000000..c4c2147
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/Writer.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+/**
+ * The interface for writing ORC files.
+ */
+public interface Writer {
+
+ /**
+ * Get the schema for this writer
+ * @return the file schema
+ */
+ TypeDescription getSchema();
+
+ /**
+ * Add arbitrary meta-data to the ORC file. This may be called at any point
+ * until the Writer is closed. If the same key is passed a second time, the
+ * second value will replace the first.
+ * @param key a key to label the data with.
+ * @param value the contents of the metadata.
+ */
+ void addUserMetadata(String key, ByteBuffer value);
+
+ /**
+ * Add a row batch to the ORC file.
+ * @param batch the rows to add
+ */
+ void addRowBatch(VectorizedRowBatch batch) throws IOException;
+
+ /**
+ * Flush all of the buffers and close the file. No methods on this writer
+ * should be called afterwards.
+ * @throws IOException
+ */
+ void close() throws IOException;
+
+ /**
+ * Return the deserialized data size. Raw data size will be compute when
+ * writing the file footer. Hence raw data size value will be available only
+ * after closing the writer.
+ *
+ * @return raw data size
+ */
+ long getRawDataSize();
+
+ /**
+ * Return the number of rows in file. Row count gets updated when flushing
+ * the stripes. To get accurate row count this method should be called after
+ * closing the writer.
+ *
+ * @return row count
+ */
+ long getNumberOfRows();
+
+ /**
+ * Write an intermediate footer on the file such that if the file is
+ * truncated to the returned offset, it would be a valid ORC file.
+ * @return the offset that would be a valid end location for an ORC file
+ */
+ long writeIntermediateFooter() throws IOException;
+
+ /**
+ * Fast stripe append to ORC file. This interface is used for fast ORC file
+ * merge with other ORC files. When merging, the file to be merged should pass
+ * stripe in binary form along with stripe information and stripe statistics.
+ * After appending last stripe of a file, use appendUserMetadata() to append
+ * any user metadata.
+ * @param stripe - stripe as byte array
+ * @param offset - offset within byte array
+ * @param length - length of stripe within byte array
+ * @param stripeInfo - stripe information
+ * @param stripeStatistics - stripe statistics (Protobuf objects can be
+ * merged directly)
+ * @throws IOException
+ */
+ public void appendStripe(byte[] stripe, int offset, int length,
+ StripeInformation stripeInfo,
+ OrcProto.StripeStatistics stripeStatistics) throws IOException;
+
+ /**
+ * When fast stripe append is used for merging ORC stripes, after appending
+ * the last stripe from a file, this interface must be used to merge any
+ * user metadata.
+ * @param userMetadata - user metadata
+ */
+ public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/AcidStats.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/AcidStats.java b/orc/src/java/org/apache/hive/orc/impl/AcidStats.java
new file mode 100644
index 0000000..aff9659
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/AcidStats.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+/**
+ * Statistics about the ACID operations in an ORC file
+ */
+public class AcidStats {
+ public long inserts;
+ public long updates;
+ public long deletes;
+
+ public AcidStats() {
+ inserts = 0;
+ updates = 0;
+ deletes = 0;
+ }
+
+ public AcidStats(String serialized) {
+ String[] parts = serialized.split(",");
+ inserts = Long.parseLong(parts[0]);
+ updates = Long.parseLong(parts[1]);
+ deletes = Long.parseLong(parts[2]);
+ }
+
+ public String serialize() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(inserts);
+ builder.append(",");
+ builder.append(updates);
+ builder.append(",");
+ builder.append(deletes);
+ return builder.toString();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(" inserts: ").append(inserts);
+ builder.append(" updates: ").append(updates);
+ builder.append(" deletes: ").append(deletes);
+ return builder.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java
new file mode 100644
index 0000000..264061d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java
@@ -0,0 +1,214 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
+public class BitFieldReader {
+ private final RunLengthByteReader input;
+ /** The number of bits in one item. Non-test code always uses 1. */
+ private final int bitSize;
+ private int current;
+ private int bitsLeft;
+ private final int mask;
+
+ public BitFieldReader(InStream input,
+ int bitSize) throws IOException {
+ this.input = new RunLengthByteReader(input);
+ this.bitSize = bitSize;
+ mask = (1 << bitSize) - 1;
+ }
+
+ public void setInStream(InStream inStream) {
+ this.input.setInStream(inStream);
+ }
+
+ private void readByte() throws IOException {
+ if (input.hasNext()) {
+ current = 0xff & input.next();
+ bitsLeft = 8;
+ } else {
+ throw new EOFException("Read past end of bit field from " + this);
+ }
+ }
+
+ public int next() throws IOException {
+ int result = 0;
+ int bitsLeftToRead = bitSize;
+ while (bitsLeftToRead > bitsLeft) {
+ result <<= bitsLeft;
+ result |= current & ((1 << bitsLeft) - 1);
+ bitsLeftToRead -= bitsLeft;
+ readByte();
+ }
+ if (bitsLeftToRead > 0) {
+ result <<= bitsLeftToRead;
+ bitsLeft -= bitsLeftToRead;
+ result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+ }
+ return result & mask;
+ }
+
+ /**
+ * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
+ * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
+ * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
+ * work anymore once this is called. These is trivial to fix, but these are never interspersed.
+ */
+ private boolean lastRunValue;
+ private int lastRunLength = -1;
+ private void readNextRun(int maxRunLength) throws IOException {
+ assert bitSize == 1;
+ if (lastRunLength > 0) return; // last run is not exhausted yet
+ if (bitsLeft == 0) {
+ readByte();
+ }
+ // First take care of the partial bits.
+ boolean hasVal = false;
+ int runLength = 0;
+ if (bitsLeft != 8) {
+ int partialBitsMask = (1 << bitsLeft) - 1;
+ int partialBits = current & partialBitsMask;
+ if (partialBits == partialBitsMask || partialBits == 0) {
+ lastRunValue = (partialBits == partialBitsMask);
+ if (maxRunLength <= bitsLeft) {
+ lastRunLength = maxRunLength;
+ return;
+ }
+ maxRunLength -= bitsLeft;
+ hasVal = true;
+ runLength = bitsLeft;
+ bitsLeft = 0;
+ } else {
+ // There's no run in partial bits. Return whatever we have.
+ int prefixBitsCount = 32 - bitsLeft;
+ runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
+ lastRunValue = (runLength > 0);
+ lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
+ (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
+ return;
+ }
+ assert bitsLeft == 0;
+ readByte();
+ }
+ if (!hasVal) {
+ lastRunValue = ((current >> 7) == 1);
+ hasVal = true;
+ }
+ // Read full bytes until the run ends.
+ assert bitsLeft == 8;
+ while (maxRunLength >= 8
+ && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
+ runLength += 8;
+ maxRunLength -= 8;
+ readByte();
+ }
+ if (maxRunLength > 0) {
+ int extraBits = Integer.numberOfLeadingZeros(
+ lastRunValue ? (~(current | ~255)) : current) - 24;
+ bitsLeft -= extraBits;
+ runLength += extraBits;
+ }
+ lastRunLength = runLength;
+ }
+
+ public void nextVector(LongColumnVector previous,
+ long previousLen) throws IOException {
+ previous.isRepeating = true;
+ for (int i = 0; i < previousLen; i++) {
+ if (previous.noNulls || !previous.isNull[i]) {
+ previous.vector[i] = next();
+ } else {
+ // The default value of null for int types in vectorized
+ // processing is 1, so set that if the value is null
+ previous.vector[i] = 1;
+ }
+
+ // The default value for nulls in Vectorization for int types is 1
+ // and given that non null value can also be 1, we need to check for isNull also
+ // when determining the isRepeating flag.
+ if (previous.isRepeating
+ && i > 0
+ && ((previous.vector[0] != previous.vector[i]) ||
+ (previous.isNull[0] != previous.isNull[i]))) {
+ previous.isRepeating = false;
+ }
+ }
+ }
+
+ public void seek(PositionProvider index) throws IOException {
+ input.seek(index);
+ int consumed = (int) index.getNext();
+ if (consumed > 8) {
+ throw new IllegalArgumentException("Seek past end of byte at " +
+ consumed + " in " + input);
+ } else if (consumed != 0) {
+ readByte();
+ bitsLeft = 8 - consumed;
+ } else {
+ bitsLeft = 0;
+ }
+ }
+
+ public void skip(long items) throws IOException {
+ long totalBits = bitSize * items;
+ if (bitsLeft >= totalBits) {
+ bitsLeft -= totalBits;
+ } else {
+ totalBits -= bitsLeft;
+ input.skip(totalBits / 8);
+ current = input.next();
+ bitsLeft = (int) (8 - (totalBits % 8));
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "bit reader current: " + current + " bits left: " + bitsLeft +
+ " bit size: " + bitSize + " from " + input;
+ }
+
+ boolean hasFullByte() {
+ return bitsLeft == 8 || bitsLeft == 0;
+ }
+
+ int peekOneBit() throws IOException {
+ assert bitSize == 1;
+ if (bitsLeft == 0) {
+ readByte();
+ }
+ return (current >>> (bitsLeft - 1)) & 1;
+ }
+
+ int peekFullByte() throws IOException {
+ assert bitSize == 1;
+ assert bitsLeft == 8 || bitsLeft == 0;
+ if (bitsLeft == 0) {
+ readByte();
+ }
+ return current;
+ }
+
+ void skipInCurrentByte(int bits) throws IOException {
+ assert bitsLeft >= bits;
+ bitsLeft -= bits;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java b/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java
new file mode 100644
index 0000000..962f035
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+public class BitFieldWriter {
+ private RunLengthByteWriter output;
+ private final int bitSize;
+ private byte current = 0;
+ private int bitsLeft = 8;
+
+ public BitFieldWriter(PositionedOutputStream output,
+ int bitSize) throws IOException {
+ this.output = new RunLengthByteWriter(output);
+ this.bitSize = bitSize;
+ }
+
+ private void writeByte() throws IOException {
+ output.write(current);
+ current = 0;
+ bitsLeft = 8;
+ }
+
+ public void flush() throws IOException {
+ if (bitsLeft != 8) {
+ writeByte();
+ }
+ output.flush();
+ }
+
+ public void write(int value) throws IOException {
+ int bitsToWrite = bitSize;
+ while (bitsToWrite > bitsLeft) {
+ // add the bits to the bottom of the current word
+ current |= value >>> (bitsToWrite - bitsLeft);
+ // subtract out the bits we just added
+ bitsToWrite -= bitsLeft;
+ // zero out the bits above bitsToWrite
+ value &= (1 << bitsToWrite) - 1;
+ writeByte();
+ }
+ bitsLeft -= bitsToWrite;
+ current |= value << bitsLeft;
+ if (bitsLeft == 0) {
+ writeByte();
+ }
+ }
+
+ public void getPosition(PositionRecorder recorder) throws IOException {
+ output.getPosition(recorder);
+ recorder.addPosition(8 - bitsLeft);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java b/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java
new file mode 100644
index 0000000..0f59b5d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java
@@ -0,0 +1,85 @@
+package org.apache.hive.orc.impl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+
+/**
+ * The sections of stripe that we have read.
+ * This might not match diskRange - 1 disk range can be multiple buffer chunks,
+ * depending on DFS block boundaries.
+ */
+public class BufferChunk extends DiskRangeList {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(BufferChunk.class);
+ final ByteBuffer chunk;
+
+ public BufferChunk(ByteBuffer chunk, long offset) {
+ super(offset, offset + chunk.remaining());
+ this.chunk = chunk;
+ }
+
+ public ByteBuffer getChunk() {
+ return chunk;
+ }
+
+ @Override
+ public boolean hasData() {
+ return chunk != null;
+ }
+
+ @Override
+ public final String toString() {
+ boolean makesSense = chunk.remaining() == (end - offset);
+ return "data range [" + offset + ", " + end + "), size: " + chunk.remaining()
+ + (makesSense ? "" : "(!)") + " type: " +
+ (chunk.isDirect() ? "direct" : "array-backed");
+ }
+
+ @Override
+ public DiskRange sliceAndShift(long offset, long end, long shiftBy) {
+ assert offset <= end && offset >= this.offset && end <= this.end;
+ assert offset + shiftBy >= 0;
+ ByteBuffer sliceBuf = chunk.slice();
+ int newPos = (int) (offset - this.offset);
+ int newLimit = newPos + (int) (end - offset);
+ try {
+ sliceBuf.position(newPos);
+ sliceBuf.limit(newLimit);
+ } catch (Throwable t) {
+ LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end
+ + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", "
+ + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") "
+ + t.getClass());
+ throw new RuntimeException(t);
+ }
+ return new BufferChunk(sliceBuf, offset + shiftBy);
+ }
+
+ @Override
+ public ByteBuffer getData() {
+ return chunk;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java b/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java
new file mode 100644
index 0000000..9cf725d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java
@@ -0,0 +1,1101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hive.orc.BinaryColumnStatistics;
+import org.apache.hive.orc.BooleanColumnStatistics;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.DecimalColumnStatistics;
+import org.apache.hive.orc.DoubleColumnStatistics;
+import org.apache.hive.orc.IntegerColumnStatistics;
+import org.apache.hive.orc.TimestampColumnStatistics;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.DateColumnStatistics;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StringColumnStatistics;
+
+public class ColumnStatisticsImpl implements ColumnStatistics {
+
+ private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
+ implements BooleanColumnStatistics {
+ private long trueCount = 0;
+
+ BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
+ trueCount = bkt.getCount(0);
+ }
+
+ BooleanStatisticsImpl() {
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ trueCount = 0;
+ }
+
+ @Override
+ public void updateBoolean(boolean value, int repetitions) {
+ if (value) {
+ trueCount += repetitions;
+ }
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof BooleanStatisticsImpl) {
+ BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
+ trueCount += bkt.trueCount;
+ } else {
+ if (isStatsExists() && trueCount != 0) {
+ throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder = super.serialize();
+ OrcProto.BucketStatistics.Builder bucket =
+ OrcProto.BucketStatistics.newBuilder();
+ bucket.addCount(trueCount);
+ builder.setBucketStatistics(bucket);
+ return builder;
+ }
+
+ @Override
+ public long getFalseCount() {
+ return getNumberOfValues() - trueCount;
+ }
+
+ @Override
+ public long getTrueCount() {
+ return trueCount;
+ }
+
+ @Override
+ public String toString() {
+ return super.toString() + " true: " + trueCount;
+ }
+ }
+
+ private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
+ implements IntegerColumnStatistics {
+
+ private long minimum = Long.MAX_VALUE;
+ private long maximum = Long.MIN_VALUE;
+ private long sum = 0;
+ private boolean hasMinimum = false;
+ private boolean overflow = false;
+
+ IntegerStatisticsImpl() {
+ }
+
+ IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
+ if (intStat.hasMinimum()) {
+ hasMinimum = true;
+ minimum = intStat.getMinimum();
+ }
+ if (intStat.hasMaximum()) {
+ maximum = intStat.getMaximum();
+ }
+ if (intStat.hasSum()) {
+ sum = intStat.getSum();
+ } else {
+ overflow = true;
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ hasMinimum = false;
+ minimum = Long.MAX_VALUE;
+ maximum = Long.MIN_VALUE;
+ sum = 0;
+ overflow = false;
+ }
+
+ @Override
+ public void updateInteger(long value, int repetitions) {
+ if (!hasMinimum) {
+ hasMinimum = true;
+ minimum = value;
+ maximum = value;
+ } else if (value < minimum) {
+ minimum = value;
+ } else if (value > maximum) {
+ maximum = value;
+ }
+ if (!overflow) {
+ boolean wasPositive = sum >= 0;
+ sum += value * repetitions;
+ if ((value >= 0) == wasPositive) {
+ overflow = (sum >= 0) != wasPositive;
+ }
+ }
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof IntegerStatisticsImpl) {
+ IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
+ if (!hasMinimum) {
+ hasMinimum = otherInt.hasMinimum;
+ minimum = otherInt.minimum;
+ maximum = otherInt.maximum;
+ } else if (otherInt.hasMinimum) {
+ if (otherInt.minimum < minimum) {
+ minimum = otherInt.minimum;
+ }
+ if (otherInt.maximum > maximum) {
+ maximum = otherInt.maximum;
+ }
+ }
+
+ overflow |= otherInt.overflow;
+ if (!overflow) {
+ boolean wasPositive = sum >= 0;
+ sum += otherInt.sum;
+ if ((otherInt.sum >= 0) == wasPositive) {
+ overflow = (sum >= 0) != wasPositive;
+ }
+ }
+ } else {
+ if (isStatsExists() && hasMinimum) {
+ throw new IllegalArgumentException("Incompatible merging of integer column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder = super.serialize();
+ OrcProto.IntegerStatistics.Builder intb =
+ OrcProto.IntegerStatistics.newBuilder();
+ if (hasMinimum) {
+ intb.setMinimum(minimum);
+ intb.setMaximum(maximum);
+ }
+ if (!overflow) {
+ intb.setSum(sum);
+ }
+ builder.setIntStatistics(intb);
+ return builder;
+ }
+
+ @Override
+ public long getMinimum() {
+ return minimum;
+ }
+
+ @Override
+ public long getMaximum() {
+ return maximum;
+ }
+
+ @Override
+ public boolean isSumDefined() {
+ return !overflow;
+ }
+
+ @Override
+ public long getSum() {
+ return sum;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (hasMinimum) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ }
+ if (!overflow) {
+ buf.append(" sum: ");
+ buf.append(sum);
+ }
+ return buf.toString();
+ }
+ }
+
+ private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
+ implements DoubleColumnStatistics {
+ private boolean hasMinimum = false;
+ private double minimum = Double.MAX_VALUE;
+ private double maximum = Double.MIN_VALUE;
+ private double sum = 0;
+
+ DoubleStatisticsImpl() {
+ }
+
+ DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
+ if (dbl.hasMinimum()) {
+ hasMinimum = true;
+ minimum = dbl.getMinimum();
+ }
+ if (dbl.hasMaximum()) {
+ maximum = dbl.getMaximum();
+ }
+ if (dbl.hasSum()) {
+ sum = dbl.getSum();
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ hasMinimum = false;
+ minimum = Double.MAX_VALUE;
+ maximum = Double.MIN_VALUE;
+ sum = 0;
+ }
+
+ @Override
+ public void updateDouble(double value) {
+ if (!hasMinimum) {
+ hasMinimum = true;
+ minimum = value;
+ maximum = value;
+ } else if (value < minimum) {
+ minimum = value;
+ } else if (value > maximum) {
+ maximum = value;
+ }
+ sum += value;
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof DoubleStatisticsImpl) {
+ DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
+ if (!hasMinimum) {
+ hasMinimum = dbl.hasMinimum;
+ minimum = dbl.minimum;
+ maximum = dbl.maximum;
+ } else if (dbl.hasMinimum) {
+ if (dbl.minimum < minimum) {
+ minimum = dbl.minimum;
+ }
+ if (dbl.maximum > maximum) {
+ maximum = dbl.maximum;
+ }
+ }
+ sum += dbl.sum;
+ } else {
+ if (isStatsExists() && hasMinimum) {
+ throw new IllegalArgumentException("Incompatible merging of double column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder = super.serialize();
+ OrcProto.DoubleStatistics.Builder dbl =
+ OrcProto.DoubleStatistics.newBuilder();
+ if (hasMinimum) {
+ dbl.setMinimum(minimum);
+ dbl.setMaximum(maximum);
+ }
+ dbl.setSum(sum);
+ builder.setDoubleStatistics(dbl);
+ return builder;
+ }
+
+ @Override
+ public double getMinimum() {
+ return minimum;
+ }
+
+ @Override
+ public double getMaximum() {
+ return maximum;
+ }
+
+ @Override
+ public double getSum() {
+ return sum;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (hasMinimum) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ }
+ buf.append(" sum: ");
+ buf.append(sum);
+ return buf.toString();
+ }
+ }
+
+ protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
+ implements StringColumnStatistics {
+ private Text minimum = null;
+ private Text maximum = null;
+ private long sum = 0;
+
+ StringStatisticsImpl() {
+ }
+
+ StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.StringStatistics str = stats.getStringStatistics();
+ if (str.hasMaximum()) {
+ maximum = new Text(str.getMaximum());
+ }
+ if (str.hasMinimum()) {
+ minimum = new Text(str.getMinimum());
+ }
+ if(str.hasSum()) {
+ sum = str.getSum();
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ minimum = null;
+ maximum = null;
+ sum = 0;
+ }
+
+ @Override
+ public void updateString(Text value) {
+ if (minimum == null) {
+ maximum = minimum = new Text(value);
+ } else if (minimum.compareTo(value) > 0) {
+ minimum = new Text(value);
+ } else if (maximum.compareTo(value) < 0) {
+ maximum = new Text(value);
+ }
+ sum += value.getLength();
+ }
+
+ @Override
+ public void updateString(byte[] bytes, int offset, int length,
+ int repetitions) {
+ if (minimum == null) {
+ maximum = minimum = new Text();
+ maximum.set(bytes, offset, length);
+ } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
+ minimum.getLength(), bytes, offset, length) > 0) {
+ minimum = new Text();
+ minimum.set(bytes, offset, length);
+ } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
+ maximum.getLength(), bytes, offset, length) < 0) {
+ maximum = new Text();
+ maximum.set(bytes, offset, length);
+ }
+ sum += length * repetitions;
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof StringStatisticsImpl) {
+ StringStatisticsImpl str = (StringStatisticsImpl) other;
+ if (minimum == null) {
+ if (str.minimum != null) {
+ maximum = new Text(str.getMaximum());
+ minimum = new Text(str.getMinimum());
+ } else {
+ /* both are empty */
+ maximum = minimum = null;
+ }
+ } else if (str.minimum != null) {
+ if (minimum.compareTo(str.minimum) > 0) {
+ minimum = new Text(str.getMinimum());
+ }
+ if (maximum.compareTo(str.maximum) < 0) {
+ maximum = new Text(str.getMaximum());
+ }
+ }
+ sum += str.sum;
+ } else {
+ if (isStatsExists() && minimum != null) {
+ throw new IllegalArgumentException("Incompatible merging of string column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.StringStatistics.Builder str =
+ OrcProto.StringStatistics.newBuilder();
+ if (getNumberOfValues() != 0) {
+ str.setMinimum(getMinimum());
+ str.setMaximum(getMaximum());
+ str.setSum(sum);
+ }
+ result.setStringStatistics(str);
+ return result;
+ }
+
+ @Override
+ public String getMinimum() {
+ return minimum == null ? null : minimum.toString();
+ }
+
+ @Override
+ public String getMaximum() {
+ return maximum == null ? null : maximum.toString();
+ }
+
+ @Override
+ public long getSum() {
+ return sum;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" min: ");
+ buf.append(getMinimum());
+ buf.append(" max: ");
+ buf.append(getMaximum());
+ buf.append(" sum: ");
+ buf.append(sum);
+ }
+ return buf.toString();
+ }
+ }
+
+ protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
+ BinaryColumnStatistics {
+
+ private long sum = 0;
+
+ BinaryStatisticsImpl() {
+ }
+
+ BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
+ if (binStats.hasSum()) {
+ sum = binStats.getSum();
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ sum = 0;
+ }
+
+ @Override
+ public void updateBinary(BytesWritable value) {
+ sum += value.getLength();
+ }
+
+ @Override
+ public void updateBinary(byte[] bytes, int offset, int length,
+ int repetitions) {
+ sum += length * repetitions;
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof BinaryColumnStatistics) {
+ BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+ sum += bin.sum;
+ } else {
+ if (isStatsExists() && sum != 0) {
+ throw new IllegalArgumentException("Incompatible merging of binary column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public long getSum() {
+ return sum;
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
+ bin.setSum(sum);
+ result.setBinaryStatistics(bin);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" sum: ");
+ buf.append(sum);
+ }
+ return buf.toString();
+ }
+ }
+
+ private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
+ implements DecimalColumnStatistics {
+
+ // These objects are mutable for better performance.
+ private HiveDecimalWritable minimum = null;
+ private HiveDecimalWritable maximum = null;
+ private HiveDecimalWritable sum = new HiveDecimalWritable(0);
+
+ DecimalStatisticsImpl() {
+ }
+
+ DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
+ if (dec.hasMaximum()) {
+ maximum = new HiveDecimalWritable(dec.getMaximum());
+ }
+ if (dec.hasMinimum()) {
+ minimum = new HiveDecimalWritable(dec.getMinimum());
+ }
+ if (dec.hasSum()) {
+ sum = new HiveDecimalWritable(dec.getSum());
+ } else {
+ sum = null;
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ minimum = null;
+ maximum = null;
+ sum = new HiveDecimalWritable(0);
+ }
+
+ @Override
+ public void updateDecimal(HiveDecimalWritable value) {
+ if (minimum == null) {
+ minimum = new HiveDecimalWritable(value);
+ maximum = new HiveDecimalWritable(value);
+ } else if (minimum.compareTo(value) > 0) {
+ minimum.set(value);
+ } else if (maximum.compareTo(value) < 0) {
+ maximum.set(value);
+ }
+ if (sum != null) {
+ sum.mutateAdd(value);
+ }
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof DecimalStatisticsImpl) {
+ DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
+ if (minimum == null) {
+ minimum = (dec.minimum != null ? new HiveDecimalWritable(dec.minimum) : null);
+ maximum = (dec.maximum != null ? new HiveDecimalWritable(dec.maximum) : null);
+ sum = dec.sum;
+ } else if (dec.minimum != null) {
+ if (minimum.compareTo(dec.minimum) > 0) {
+ minimum.set(dec.minimum);
+ }
+ if (maximum.compareTo(dec.maximum) < 0) {
+ maximum.set(dec.maximum);
+ }
+ if (sum == null || dec.sum == null) {
+ sum = null;
+ } else {
+ sum.mutateAdd(dec.sum);
+ }
+ }
+ } else {
+ if (isStatsExists() && minimum != null) {
+ throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.DecimalStatistics.Builder dec =
+ OrcProto.DecimalStatistics.newBuilder();
+ if (getNumberOfValues() != 0 && minimum != null) {
+ dec.setMinimum(minimum.toString());
+ dec.setMaximum(maximum.toString());
+ }
+ // Check isSet for overflow.
+ if (sum != null && sum.isSet()) {
+ dec.setSum(sum.toString());
+ }
+ result.setDecimalStatistics(dec);
+ return result;
+ }
+
+ @Override
+ public HiveDecimal getMinimum() {
+ return minimum == null ? null : minimum.getHiveDecimal();
+ }
+
+ @Override
+ public HiveDecimal getMaximum() {
+ return maximum == null ? null : maximum.getHiveDecimal();
+ }
+
+ @Override
+ public HiveDecimal getSum() {
+ return sum == null ? null : sum.getHiveDecimal();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ if (sum != null) {
+ buf.append(" sum: ");
+ buf.append(sum);
+ }
+ }
+ return buf.toString();
+ }
+ }
+
+ private static final class DateStatisticsImpl extends ColumnStatisticsImpl
+ implements DateColumnStatistics {
+ private Integer minimum = null;
+ private Integer maximum = null;
+
+ DateStatisticsImpl() {
+ }
+
+ DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.DateStatistics dateStats = stats.getDateStatistics();
+ // min,max values serialized/deserialized as int (days since epoch)
+ if (dateStats.hasMaximum()) {
+ maximum = dateStats.getMaximum();
+ }
+ if (dateStats.hasMinimum()) {
+ minimum = dateStats.getMinimum();
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ minimum = null;
+ maximum = null;
+ }
+
+ @Override
+ public void updateDate(DateWritable value) {
+ if (minimum == null) {
+ minimum = value.getDays();
+ maximum = value.getDays();
+ } else if (minimum > value.getDays()) {
+ minimum = value.getDays();
+ } else if (maximum < value.getDays()) {
+ maximum = value.getDays();
+ }
+ }
+
+ @Override
+ public void updateDate(int value) {
+ if (minimum == null) {
+ minimum = value;
+ maximum = value;
+ } else if (minimum > value) {
+ minimum = value;
+ } else if (maximum < value) {
+ maximum = value;
+ }
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof DateStatisticsImpl) {
+ DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
+ if (minimum == null) {
+ minimum = dateStats.minimum;
+ maximum = dateStats.maximum;
+ } else if (dateStats.minimum != null) {
+ if (minimum > dateStats.minimum) {
+ minimum = dateStats.minimum;
+ }
+ if (maximum < dateStats.maximum) {
+ maximum = dateStats.maximum;
+ }
+ }
+ } else {
+ if (isStatsExists() && minimum != null) {
+ throw new IllegalArgumentException("Incompatible merging of date column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.DateStatistics.Builder dateStats =
+ OrcProto.DateStatistics.newBuilder();
+ if (getNumberOfValues() != 0 && minimum != null) {
+ dateStats.setMinimum(minimum);
+ dateStats.setMaximum(maximum);
+ }
+ result.setDateStatistics(dateStats);
+ return result;
+ }
+
+ private transient final DateWritable minDate = new DateWritable();
+ private transient final DateWritable maxDate = new DateWritable();
+
+ @Override
+ public Date getMinimum() {
+ if (minimum == null) {
+ return null;
+ }
+ minDate.set(minimum);
+ return minDate.get();
+ }
+
+ @Override
+ public Date getMaximum() {
+ if (maximum == null) {
+ return null;
+ }
+ maxDate.set(maximum);
+ return maxDate.get();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" min: ");
+ buf.append(getMinimum());
+ buf.append(" max: ");
+ buf.append(getMaximum());
+ }
+ return buf.toString();
+ }
+ }
+
+ private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
+ implements TimestampColumnStatistics {
+ private Long minimum = null;
+ private Long maximum = null;
+
+ TimestampStatisticsImpl() {
+ }
+
+ TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
+ // min,max values serialized/deserialized as int (milliseconds since epoch)
+ if (timestampStats.hasMaximum()) {
+ maximum = timestampStats.getMaximum();
+ }
+ if (timestampStats.hasMinimum()) {
+ minimum = timestampStats.getMinimum();
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ minimum = null;
+ maximum = null;
+ }
+
+ @Override
+ public void updateTimestamp(Timestamp value) {
+ if (minimum == null) {
+ minimum = value.getTime();
+ maximum = value.getTime();
+ } else if (minimum > value.getTime()) {
+ minimum = value.getTime();
+ } else if (maximum < value.getTime()) {
+ maximum = value.getTime();
+ }
+ }
+
+ @Override
+ public void updateTimestamp(long value) {
+ if (minimum == null) {
+ minimum = value;
+ maximum = value;
+ } else if (minimum > value) {
+ minimum = value;
+ } else if (maximum < value) {
+ maximum = value;
+ }
+ }
+
+ @Override
+ public void merge(ColumnStatisticsImpl other) {
+ if (other instanceof TimestampStatisticsImpl) {
+ TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
+ if (minimum == null) {
+ minimum = timestampStats.minimum;
+ maximum = timestampStats.maximum;
+ } else if (timestampStats.minimum != null) {
+ if (minimum > timestampStats.minimum) {
+ minimum = timestampStats.minimum;
+ }
+ if (maximum < timestampStats.maximum) {
+ maximum = timestampStats.maximum;
+ }
+ }
+ } else {
+ if (isStatsExists() && minimum != null) {
+ throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
+ }
+ }
+ super.merge(other);
+ }
+
+ @Override
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
+ .newBuilder();
+ if (getNumberOfValues() != 0 && minimum != null) {
+ timestampStats.setMinimum(minimum);
+ timestampStats.setMaximum(maximum);
+ }
+ result.setTimestampStatistics(timestampStats);
+ return result;
+ }
+
+ @Override
+ public Timestamp getMinimum() {
+ return minimum == null ? null : new Timestamp(minimum);
+ }
+
+ @Override
+ public Timestamp getMaximum() {
+ return maximum == null ? null : new Timestamp(maximum);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" min: ");
+ buf.append(getMinimum());
+ buf.append(" max: ");
+ buf.append(getMaximum());
+ }
+ return buf.toString();
+ }
+ }
+
+ private long count = 0;
+ private boolean hasNull = false;
+
+ ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ if (stats.hasNumberOfValues()) {
+ count = stats.getNumberOfValues();
+ }
+
+ if (stats.hasHasNull()) {
+ hasNull = stats.getHasNull();
+ } else {
+ hasNull = true;
+ }
+ }
+
+ ColumnStatisticsImpl() {
+ }
+
+ public void increment() {
+ count += 1;
+ }
+
+ public void increment(int count) {
+ this.count += count;
+ }
+
+ public void setNull() {
+ hasNull = true;
+ }
+
+ public void updateBoolean(boolean value, int repetitions) {
+ throw new UnsupportedOperationException("Can't update boolean");
+ }
+
+ public void updateInteger(long value, int repetitions) {
+ throw new UnsupportedOperationException("Can't update integer");
+ }
+
+ public void updateDouble(double value) {
+ throw new UnsupportedOperationException("Can't update double");
+ }
+
+ public void updateString(Text value) {
+ throw new UnsupportedOperationException("Can't update string");
+ }
+
+ public void updateString(byte[] bytes, int offset, int length,
+ int repetitions) {
+ throw new UnsupportedOperationException("Can't update string");
+ }
+
+ public void updateBinary(BytesWritable value) {
+ throw new UnsupportedOperationException("Can't update binary");
+ }
+
+ public void updateBinary(byte[] bytes, int offset, int length,
+ int repetitions) {
+ throw new UnsupportedOperationException("Can't update string");
+ }
+
+ public void updateDecimal(HiveDecimalWritable value) {
+ throw new UnsupportedOperationException("Can't update decimal");
+ }
+
+ public void updateDate(DateWritable value) {
+ throw new UnsupportedOperationException("Can't update date");
+ }
+
+ public void updateDate(int value) {
+ throw new UnsupportedOperationException("Can't update date");
+ }
+
+ public void updateTimestamp(Timestamp value) {
+ throw new UnsupportedOperationException("Can't update timestamp");
+ }
+
+ public void updateTimestamp(long value) {
+ throw new UnsupportedOperationException("Can't update timestamp");
+ }
+
+ public boolean isStatsExists() {
+ return (count > 0 || hasNull == true);
+ }
+
+ public void merge(ColumnStatisticsImpl stats) {
+ count += stats.count;
+ hasNull |= stats.hasNull;
+ }
+
+ public void reset() {
+ count = 0;
+ hasNull = false;
+ }
+
+ @Override
+ public long getNumberOfValues() {
+ return count;
+ }
+
+ @Override
+ public boolean hasNull() {
+ return hasNull;
+ }
+
+ @Override
+ public String toString() {
+ return "count: " + count + " hasNull: " + hasNull;
+ }
+
+ public OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder builder =
+ OrcProto.ColumnStatistics.newBuilder();
+ builder.setNumberOfValues(count);
+ builder.setHasNull(hasNull);
+ return builder;
+ }
+
+ public static ColumnStatisticsImpl create(TypeDescription schema) {
+ switch (schema.getCategory()) {
+ case BOOLEAN:
+ return new BooleanStatisticsImpl();
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new IntegerStatisticsImpl();
+ case FLOAT:
+ case DOUBLE:
+ return new DoubleStatisticsImpl();
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ return new StringStatisticsImpl();
+ case DECIMAL:
+ return new DecimalStatisticsImpl();
+ case DATE:
+ return new DateStatisticsImpl();
+ case TIMESTAMP:
+ return new TimestampStatisticsImpl();
+ case BINARY:
+ return new BinaryStatisticsImpl();
+ default:
+ return new ColumnStatisticsImpl();
+ }
+ }
+
+ public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
+ if (stats.hasBucketStatistics()) {
+ return new BooleanStatisticsImpl(stats);
+ } else if (stats.hasIntStatistics()) {
+ return new IntegerStatisticsImpl(stats);
+ } else if (stats.hasDoubleStatistics()) {
+ return new DoubleStatisticsImpl(stats);
+ } else if (stats.hasStringStatistics()) {
+ return new StringStatisticsImpl(stats);
+ } else if (stats.hasDecimalStatistics()) {
+ return new DecimalStatisticsImpl(stats);
+ } else if (stats.hasDateStatistics()) {
+ return new DateStatisticsImpl(stats);
+ } else if (stats.hasTimestampStatistics()) {
+ return new TimestampStatisticsImpl(stats);
+ } else if(stats.hasBinaryStatistics()) {
+ return new BinaryStatisticsImpl(stats);
+ } else {
+ return new ColumnStatisticsImpl(stats);
+ }
+ }
+}