You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/02/03 16:38:18 UTC
[19/22] hive git commit: HIVE-14007. Replace hive-orc module with ORC
1.3.1
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StripeStatistics.java b/orc/src/java/org/apache/orc/StripeStatistics.java
deleted file mode 100644
index 8fc91cb..0000000
--- a/orc/src/java/org/apache/orc/StripeStatistics.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.orc.impl.ColumnStatisticsImpl;
-
-import java.util.List;
-
-public class StripeStatistics {
- private final List<OrcProto.ColumnStatistics> cs;
-
- public StripeStatistics(List<OrcProto.ColumnStatistics> list) {
- this.cs = list;
- }
-
- /**
- * Return list of column statistics
- *
- * @return column stats
- */
- public ColumnStatistics[] getColumnStatistics() {
- ColumnStatistics[] result = new ColumnStatistics[cs.size()];
- for (int i = 0; i < result.length; ++i) {
- result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
- }
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java b/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
deleted file mode 100644
index 27dc49f..0000000
--- a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.sql.Timestamp;
-
-/**
- * Statistics for Timestamp columns.
- */
-public interface TimestampColumnStatistics extends ColumnStatistics {
- /**
- * Get the minimum value for the column.
- * @return minimum value
- */
- Timestamp getMinimum();
-
- /**
- * Get the maximum value for the column.
- * @return maximum value
- */
- Timestamp getMaximum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TypeDescription.java b/orc/src/java/org/apache/orc/TypeDescription.java
deleted file mode 100644
index 2e9328b..0000000
--- a/orc/src/java/org/apache/orc/TypeDescription.java
+++ /dev/null
@@ -1,870 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * This is the description of the types in an ORC file.
- */
-public class TypeDescription
- implements Comparable<TypeDescription>, Serializable {
- private static final int MAX_PRECISION = 38;
- private static final int MAX_SCALE = 38;
- private static final int DEFAULT_PRECISION = 38;
- private static final int DEFAULT_SCALE = 10;
- private static final int DEFAULT_LENGTH = 256;
-
- @Override
- public int compareTo(TypeDescription other) {
- if (this == other) {
- return 0;
- } else if (other == null) {
- return -1;
- } else {
- int result = category.compareTo(other.category);
- if (result == 0) {
- switch (category) {
- case CHAR:
- case VARCHAR:
- return maxLength - other.maxLength;
- case DECIMAL:
- if (precision != other.precision) {
- return precision - other.precision;
- }
- return scale - other.scale;
- case UNION:
- case LIST:
- case MAP:
- if (children.size() != other.children.size()) {
- return children.size() - other.children.size();
- }
- for(int c=0; result == 0 && c < children.size(); ++c) {
- result = children.get(c).compareTo(other.children.get(c));
- }
- break;
- case STRUCT:
- if (children.size() != other.children.size()) {
- return children.size() - other.children.size();
- }
- for(int c=0; result == 0 && c < children.size(); ++c) {
- result = fieldNames.get(c).compareTo(other.fieldNames.get(c));
- if (result == 0) {
- result = children.get(c).compareTo(other.children.get(c));
- }
- }
- break;
- default:
- // PASS
- }
- }
- return result;
- }
- }
-
- public enum Category {
- BOOLEAN("boolean", true),
- BYTE("tinyint", true),
- SHORT("smallint", true),
- INT("int", true),
- LONG("bigint", true),
- FLOAT("float", true),
- DOUBLE("double", true),
- STRING("string", true),
- DATE("date", true),
- TIMESTAMP("timestamp", true),
- BINARY("binary", true),
- DECIMAL("decimal", true),
- VARCHAR("varchar", true),
- CHAR("char", true),
- LIST("array", false),
- MAP("map", false),
- STRUCT("struct", false),
- UNION("uniontype", false);
-
- Category(String name, boolean isPrimitive) {
- this.name = name;
- this.isPrimitive = isPrimitive;
- }
-
- final boolean isPrimitive;
- final String name;
-
- public boolean isPrimitive() {
- return isPrimitive;
- }
-
- public String getName() {
- return name;
- }
- }
-
- public static TypeDescription createBoolean() {
- return new TypeDescription(Category.BOOLEAN);
- }
-
- public static TypeDescription createByte() {
- return new TypeDescription(Category.BYTE);
- }
-
- public static TypeDescription createShort() {
- return new TypeDescription(Category.SHORT);
- }
-
- public static TypeDescription createInt() {
- return new TypeDescription(Category.INT);
- }
-
- public static TypeDescription createLong() {
- return new TypeDescription(Category.LONG);
- }
-
- public static TypeDescription createFloat() {
- return new TypeDescription(Category.FLOAT);
- }
-
- public static TypeDescription createDouble() {
- return new TypeDescription(Category.DOUBLE);
- }
-
- public static TypeDescription createString() {
- return new TypeDescription(Category.STRING);
- }
-
- public static TypeDescription createDate() {
- return new TypeDescription(Category.DATE);
- }
-
- public static TypeDescription createTimestamp() {
- return new TypeDescription(Category.TIMESTAMP);
- }
-
- public static TypeDescription createBinary() {
- return new TypeDescription(Category.BINARY);
- }
-
- public static TypeDescription createDecimal() {
- return new TypeDescription(Category.DECIMAL);
- }
-
- static class StringPosition {
- final String value;
- int position;
- final int length;
-
- StringPosition(String value) {
- this.value = value;
- position = 0;
- length = value.length();
- }
-
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- buffer.append('\'');
- buffer.append(value.substring(0, position));
- buffer.append('^');
- buffer.append(value.substring(position));
- buffer.append('\'');
- return buffer.toString();
- }
- }
-
- static Category parseCategory(StringPosition source) {
- int start = source.position;
- while (source.position < source.length) {
- char ch = source.value.charAt(source.position);
- if (!Character.isLetter(ch)) {
- break;
- }
- source.position += 1;
- }
- if (source.position != start) {
- String word = source.value.substring(start, source.position).toLowerCase();
- for (Category cat : Category.values()) {
- if (cat.getName().equals(word)) {
- return cat;
- }
- }
- }
- throw new IllegalArgumentException("Can't parse category at " + source);
- }
-
- static int parseInt(StringPosition source) {
- int start = source.position;
- int result = 0;
- while (source.position < source.length) {
- char ch = source.value.charAt(source.position);
- if (!Character.isDigit(ch)) {
- break;
- }
- result = result * 10 + (ch - '0');
- source.position += 1;
- }
- if (source.position == start) {
- throw new IllegalArgumentException("Missing integer at " + source);
- }
- return result;
- }
-
- static String parseName(StringPosition source) {
- int start = source.position;
- while (source.position < source.length) {
- char ch = source.value.charAt(source.position);
- if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
- break;
- }
- source.position += 1;
- }
- if (source.position == start) {
- throw new IllegalArgumentException("Missing name at " + source);
- }
- return source.value.substring(start, source.position);
- }
-
- static void requireChar(StringPosition source, char required) {
- if (source.position >= source.length ||
- source.value.charAt(source.position) != required) {
- throw new IllegalArgumentException("Missing required char '" +
- required + "' at " + source);
- }
- source.position += 1;
- }
-
- static boolean consumeChar(StringPosition source, char ch) {
- boolean result = source.position < source.length &&
- source.value.charAt(source.position) == ch;
- if (result) {
- source.position += 1;
- }
- return result;
- }
-
- static void parseUnion(TypeDescription type, StringPosition source) {
- requireChar(source, '<');
- do {
- type.addUnionChild(parseType(source));
- } while (consumeChar(source, ','));
- requireChar(source, '>');
- }
-
- static void parseStruct(TypeDescription type, StringPosition source) {
- requireChar(source, '<');
- do {
- String fieldName = parseName(source);
- requireChar(source, ':');
- type.addField(fieldName, parseType(source));
- } while (consumeChar(source, ','));
- requireChar(source, '>');
- }
-
- static TypeDescription parseType(StringPosition source) {
- TypeDescription result = new TypeDescription(parseCategory(source));
- switch (result.getCategory()) {
- case BINARY:
- case BOOLEAN:
- case BYTE:
- case DATE:
- case DOUBLE:
- case FLOAT:
- case INT:
- case LONG:
- case SHORT:
- case STRING:
- case TIMESTAMP:
- break;
- case CHAR:
- case VARCHAR:
- requireChar(source, '(');
- result.withMaxLength(parseInt(source));
- requireChar(source, ')');
- break;
- case DECIMAL: {
- requireChar(source, '(');
- int precision = parseInt(source);
- requireChar(source, ',');
- result.withScale(parseInt(source));
- result.withPrecision(precision);
- requireChar(source, ')');
- break;
- }
- case LIST:
- requireChar(source, '<');
- result.children.add(parseType(source));
- requireChar(source, '>');
- break;
- case MAP:
- requireChar(source, '<');
- result.children.add(parseType(source));
- requireChar(source, ',');
- result.children.add(parseType(source));
- requireChar(source, '>');
- break;
- case UNION:
- parseUnion(result, source);
- break;
- case STRUCT:
- parseStruct(result, source);
- break;
- default:
- throw new IllegalArgumentException("Unknown type " +
- result.getCategory() + " at " + source);
- }
- return result;
- }
-
- /**
- * Parse TypeDescription from the Hive type names. This is the inverse
- * of TypeDescription.toString()
- * @param typeName the name of the type
- * @return a new TypeDescription or null if typeName was null
- * @throws IllegalArgumentException if the string is badly formed
- */
- public static TypeDescription fromString(String typeName) {
- if (typeName == null) {
- return null;
- }
- StringPosition source = new StringPosition(typeName);
- TypeDescription result = parseType(source);
- if (source.position != source.length) {
- throw new IllegalArgumentException("Extra characters at " + source);
- }
- return result;
- }
-
- /**
- * For decimal types, set the precision.
- * @param precision the new precision
- * @return this
- */
- public TypeDescription withPrecision(int precision) {
- if (category != Category.DECIMAL) {
- throw new IllegalArgumentException("precision is only allowed on decimal"+
- " and not " + category.name);
- } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
- throw new IllegalArgumentException("precision " + precision +
- " is out of range 1 .. " + scale);
- }
- this.precision = precision;
- return this;
- }
-
- /**
- * For decimal types, set the scale.
- * @param scale the new scale
- * @return this
- */
- public TypeDescription withScale(int scale) {
- if (category != Category.DECIMAL) {
- throw new IllegalArgumentException("scale is only allowed on decimal"+
- " and not " + category.name);
- } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
- throw new IllegalArgumentException("scale is out of range at " + scale);
- }
- this.scale = scale;
- return this;
- }
-
- public static TypeDescription createVarchar() {
- return new TypeDescription(Category.VARCHAR);
- }
-
- public static TypeDescription createChar() {
- return new TypeDescription(Category.CHAR);
- }
-
- /**
- * Set the maximum length for char and varchar types.
- * @param maxLength the maximum value
- * @return this
- */
- public TypeDescription withMaxLength(int maxLength) {
- if (category != Category.VARCHAR && category != Category.CHAR) {
- throw new IllegalArgumentException("maxLength is only allowed on char" +
- " and varchar and not " + category.name);
- }
- this.maxLength = maxLength;
- return this;
- }
-
- public static TypeDescription createList(TypeDescription childType) {
- TypeDescription result = new TypeDescription(Category.LIST);
- result.children.add(childType);
- childType.parent = result;
- return result;
- }
-
- public static TypeDescription createMap(TypeDescription keyType,
- TypeDescription valueType) {
- TypeDescription result = new TypeDescription(Category.MAP);
- result.children.add(keyType);
- result.children.add(valueType);
- keyType.parent = result;
- valueType.parent = result;
- return result;
- }
-
- public static TypeDescription createUnion() {
- return new TypeDescription(Category.UNION);
- }
-
- public static TypeDescription createStruct() {
- return new TypeDescription(Category.STRUCT);
- }
-
- /**
- * Add a child to a union type.
- * @param child a new child type to add
- * @return the union type.
- */
- public TypeDescription addUnionChild(TypeDescription child) {
- if (category != Category.UNION) {
- throw new IllegalArgumentException("Can only add types to union type" +
- " and not " + category);
- }
- children.add(child);
- child.parent = this;
- return this;
- }
-
- /**
- * Add a field to a struct type as it is built.
- * @param field the field name
- * @param fieldType the type of the field
- * @return the struct type
- */
- public TypeDescription addField(String field, TypeDescription fieldType) {
- if (category != Category.STRUCT) {
- throw new IllegalArgumentException("Can only add fields to struct type" +
- " and not " + category);
- }
- fieldNames.add(field);
- children.add(fieldType);
- fieldType.parent = this;
- return this;
- }
-
- /**
- * Get the id for this type.
- * The first call will cause all of the the ids in tree to be assigned, so
- * it should not be called before the type is completely built.
- * @return the sequential id
- */
- public int getId() {
- // if the id hasn't been assigned, assign all of the ids from the root
- if (id == -1) {
- TypeDescription root = this;
- while (root.parent != null) {
- root = root.parent;
- }
- root.assignIds(0);
- }
- return id;
- }
-
- public TypeDescription clone() {
- TypeDescription result = new TypeDescription(category);
- result.maxLength = maxLength;
- result.precision = precision;
- result.scale = scale;
- if (fieldNames != null) {
- result.fieldNames.addAll(fieldNames);
- }
- if (children != null) {
- for(TypeDescription child: children) {
- TypeDescription clone = child.clone();
- clone.parent = result;
- result.children.add(clone);
- }
- }
- return result;
- }
-
- @Override
- public int hashCode() {
- long result = category.ordinal() * 4241 + maxLength + precision * 13 + scale;
- if (children != null) {
- for(TypeDescription child: children) {
- result = result * 6959 + child.hashCode();
- }
- }
- return (int) result;
- }
-
- @Override
- public boolean equals(Object other) {
- if (other == null || !(other instanceof TypeDescription)) {
- return false;
- }
- if (other == this) {
- return true;
- }
- TypeDescription castOther = (TypeDescription) other;
- if (category != castOther.category ||
- maxLength != castOther.maxLength ||
- scale != castOther.scale ||
- precision != castOther.precision) {
- return false;
- }
- if (children != null) {
- if (children.size() != castOther.children.size()) {
- return false;
- }
- for (int i = 0; i < children.size(); ++i) {
- if (!children.get(i).equals(castOther.children.get(i))) {
- return false;
- }
- }
- }
- if (category == Category.STRUCT) {
- for(int i=0; i < fieldNames.size(); ++i) {
- if (!fieldNames.get(i).equals(castOther.fieldNames.get(i))) {
- return false;
- }
- }
- }
- return true;
- }
-
- /**
- * Get the maximum id assigned to this type or its children.
- * The first call will cause all of the the ids in tree to be assigned, so
- * it should not be called before the type is completely built.
- * @return the maximum id assigned under this type
- */
- public int getMaximumId() {
- // if the id hasn't been assigned, assign all of the ids from the root
- if (maxId == -1) {
- TypeDescription root = this;
- while (root.parent != null) {
- root = root.parent;
- }
- root.assignIds(0);
- }
- return maxId;
- }
-
- private ColumnVector createColumn(int maxSize) {
- switch (category) {
- case BOOLEAN:
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case DATE:
- return new LongColumnVector(maxSize);
- case TIMESTAMP:
- return new TimestampColumnVector(maxSize);
- case FLOAT:
- case DOUBLE:
- return new DoubleColumnVector(maxSize);
- case DECIMAL:
- return new DecimalColumnVector(maxSize, precision, scale);
- case STRING:
- case BINARY:
- case CHAR:
- case VARCHAR:
- return new BytesColumnVector(maxSize);
- case STRUCT: {
- ColumnVector[] fieldVector = new ColumnVector[children.size()];
- for(int i=0; i < fieldVector.length; ++i) {
- fieldVector[i] = children.get(i).createColumn(maxSize);
- }
- return new StructColumnVector(maxSize,
- fieldVector);
- }
- case UNION: {
- ColumnVector[] fieldVector = new ColumnVector[children.size()];
- for(int i=0; i < fieldVector.length; ++i) {
- fieldVector[i] = children.get(i).createColumn(maxSize);
- }
- return new UnionColumnVector(maxSize,
- fieldVector);
- }
- case LIST:
- return new ListColumnVector(maxSize,
- children.get(0).createColumn(maxSize));
- case MAP:
- return new MapColumnVector(maxSize,
- children.get(0).createColumn(maxSize),
- children.get(1).createColumn(maxSize));
- default:
- throw new IllegalArgumentException("Unknown type " + category);
- }
- }
-
- public VectorizedRowBatch createRowBatch(int maxSize) {
- VectorizedRowBatch result;
- if (category == Category.STRUCT) {
- result = new VectorizedRowBatch(children.size(), maxSize);
- for(int i=0; i < result.cols.length; ++i) {
- result.cols[i] = children.get(i).createColumn(maxSize);
- }
- } else {
- result = new VectorizedRowBatch(1, maxSize);
- result.cols[0] = createColumn(maxSize);
- }
- result.reset();
- return result;
- }
-
- public VectorizedRowBatch createRowBatch() {
- return createRowBatch(VectorizedRowBatch.DEFAULT_SIZE);
- }
-
- /**
- * Get the kind of this type.
- * @return get the category for this type.
- */
- public Category getCategory() {
- return category;
- }
-
- /**
- * Get the maximum length of the type. Only used for char and varchar types.
- * @return the maximum length of the string type
- */
- public int getMaxLength() {
- return maxLength;
- }
-
- /**
- * Get the precision of the decimal type.
- * @return the number of digits for the precision.
- */
- public int getPrecision() {
- return precision;
- }
-
- /**
- * Get the scale of the decimal type.
- * @return the number of digits for the scale.
- */
- public int getScale() {
- return scale;
- }
-
- /**
- * For struct types, get the list of field names.
- * @return the list of field names.
- */
- public List<String> getFieldNames() {
- return Collections.unmodifiableList(fieldNames);
- }
-
- /**
- * Get the subtypes of this type.
- * @return the list of children types
- */
- public List<TypeDescription> getChildren() {
- return children == null ? null : Collections.unmodifiableList(children);
- }
-
- /**
- * Assign ids to all of the nodes under this one.
- * @param startId the lowest id to assign
- * @return the next available id
- */
- private int assignIds(int startId) {
- id = startId++;
- if (children != null) {
- for (TypeDescription child : children) {
- startId = child.assignIds(startId);
- }
- }
- maxId = startId - 1;
- return startId;
- }
-
- private TypeDescription(Category category) {
- this.category = category;
- if (category.isPrimitive) {
- children = null;
- } else {
- children = new ArrayList<>();
- }
- if (category == Category.STRUCT) {
- fieldNames = new ArrayList<>();
- } else {
- fieldNames = null;
- }
- }
-
- private int id = -1;
- private int maxId = -1;
- private TypeDescription parent;
- private final Category category;
- private final List<TypeDescription> children;
- private final List<String> fieldNames;
- private int maxLength = DEFAULT_LENGTH;
- private int precision = DEFAULT_PRECISION;
- private int scale = DEFAULT_SCALE;
-
- public void printToBuffer(StringBuilder buffer) {
- buffer.append(category.name);
- switch (category) {
- case DECIMAL:
- buffer.append('(');
- buffer.append(precision);
- buffer.append(',');
- buffer.append(scale);
- buffer.append(')');
- break;
- case CHAR:
- case VARCHAR:
- buffer.append('(');
- buffer.append(maxLength);
- buffer.append(')');
- break;
- case LIST:
- case MAP:
- case UNION:
- buffer.append('<');
- for(int i=0; i < children.size(); ++i) {
- if (i != 0) {
- buffer.append(',');
- }
- children.get(i).printToBuffer(buffer);
- }
- buffer.append('>');
- break;
- case STRUCT:
- buffer.append('<');
- for(int i=0; i < children.size(); ++i) {
- if (i != 0) {
- buffer.append(',');
- }
- buffer.append(fieldNames.get(i));
- buffer.append(':');
- children.get(i).printToBuffer(buffer);
- }
- buffer.append('>');
- break;
- default:
- break;
- }
- }
-
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- printToBuffer(buffer);
- return buffer.toString();
- }
-
- private void printJsonToBuffer(String prefix, StringBuilder buffer,
- int indent) {
- for(int i=0; i < indent; ++i) {
- buffer.append(' ');
- }
- buffer.append(prefix);
- buffer.append("{\"category\": \"");
- buffer.append(category.name);
- buffer.append("\", \"id\": ");
- buffer.append(getId());
- buffer.append(", \"max\": ");
- buffer.append(maxId);
- switch (category) {
- case DECIMAL:
- buffer.append(", \"precision\": ");
- buffer.append(precision);
- buffer.append(", \"scale\": ");
- buffer.append(scale);
- break;
- case CHAR:
- case VARCHAR:
- buffer.append(", \"length\": ");
- buffer.append(maxLength);
- break;
- case LIST:
- case MAP:
- case UNION:
- buffer.append(", \"children\": [");
- for(int i=0; i < children.size(); ++i) {
- buffer.append('\n');
- children.get(i).printJsonToBuffer("", buffer, indent + 2);
- if (i != children.size() - 1) {
- buffer.append(',');
- }
- }
- buffer.append("]");
- break;
- case STRUCT:
- buffer.append(", \"fields\": [");
- for(int i=0; i < children.size(); ++i) {
- buffer.append('\n');
- children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
- buffer, indent + 2);
- if (i != children.size() - 1) {
- buffer.append(',');
- }
- }
- buffer.append(']');
- break;
- default:
- break;
- }
- buffer.append('}');
- }
-
- public String toJson() {
- StringBuilder buffer = new StringBuilder();
- printJsonToBuffer("", buffer, 0);
- return buffer.toString();
- }
-
- /**
- * Locate a subtype by its id.
- * @param goal the column id to look for
- * @return the subtype
- */
- public TypeDescription findSubtype(int goal) {
- // call getId method to make sure the ids are assigned
- int id = getId();
- if (goal < id || goal > maxId) {
- throw new IllegalArgumentException("Unknown type id " + id + " in " +
- toJson());
- }
- if (goal == id) {
- return this;
- } else {
- TypeDescription prev = null;
- for(TypeDescription next: children) {
- if (next.id > goal) {
- return prev.findSubtype(goal);
- }
- prev = next;
- }
- return prev.findSubtype(goal);
- }
- }}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/Writer.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Writer.java b/orc/src/java/org/apache/orc/Writer.java
deleted file mode 100644
index 4492062..0000000
--- a/orc/src/java/org/apache/orc/Writer.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.TypeDescription;
-
-/**
- * The interface for writing ORC files.
- */
-public interface Writer {
-
- /**
- * Get the schema for this writer
- * @return the file schema
- */
- TypeDescription getSchema();
-
- /**
- * Add arbitrary meta-data to the ORC file. This may be called at any point
- * until the Writer is closed. If the same key is passed a second time, the
- * second value will replace the first.
- * @param key a key to label the data with.
- * @param value the contents of the metadata.
- */
- void addUserMetadata(String key, ByteBuffer value);
-
- /**
- * Add a row batch to the ORC file.
- * @param batch the rows to add
- */
- void addRowBatch(VectorizedRowBatch batch) throws IOException;
-
- /**
- * Flush all of the buffers and close the file. No methods on this writer
- * should be called afterwards.
- * @throws IOException
- */
- void close() throws IOException;
-
- /**
- * Return the deserialized data size. Raw data size will be compute when
- * writing the file footer. Hence raw data size value will be available only
- * after closing the writer.
- *
- * @return raw data size
- */
- long getRawDataSize();
-
- /**
- * Return the number of rows in file. Row count gets updated when flushing
- * the stripes. To get accurate row count this method should be called after
- * closing the writer.
- *
- * @return row count
- */
- long getNumberOfRows();
-
- /**
- * Write an intermediate footer on the file such that if the file is
- * truncated to the returned offset, it would be a valid ORC file.
- * @return the offset that would be a valid end location for an ORC file
- */
- long writeIntermediateFooter() throws IOException;
-
- /**
- * Fast stripe append to ORC file. This interface is used for fast ORC file
- * merge with other ORC files. When merging, the file to be merged should pass
- * stripe in binary form along with stripe information and stripe statistics.
- * After appending last stripe of a file, use appendUserMetadata() to append
- * any user metadata.
- * @param stripe - stripe as byte array
- * @param offset - offset within byte array
- * @param length - length of stripe within byte array
- * @param stripeInfo - stripe information
- * @param stripeStatistics - stripe statistics (Protobuf objects can be
- * merged directly)
- * @throws IOException
- */
- public void appendStripe(byte[] stripe, int offset, int length,
- StripeInformation stripeInfo,
- OrcProto.StripeStatistics stripeStatistics) throws IOException;
-
- /**
- * When fast stripe append is used for merging ORC stripes, after appending
- * the last stripe from a file, this interface must be used to merge any
- * user metadata.
- * @param userMetadata - user metadata
- */
- public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/AcidStats.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/AcidStats.java b/orc/src/java/org/apache/orc/impl/AcidStats.java
deleted file mode 100644
index 6657fe9..0000000
--- a/orc/src/java/org/apache/orc/impl/AcidStats.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-/**
- * Statistics about the ACID operations in an ORC file
- */
-public class AcidStats {
- public long inserts;
- public long updates;
- public long deletes;
-
- public AcidStats() {
- inserts = 0;
- updates = 0;
- deletes = 0;
- }
-
- public AcidStats(String serialized) {
- String[] parts = serialized.split(",");
- inserts = Long.parseLong(parts[0]);
- updates = Long.parseLong(parts[1]);
- deletes = Long.parseLong(parts[2]);
- }
-
- public String serialize() {
- StringBuilder builder = new StringBuilder();
- builder.append(inserts);
- builder.append(",");
- builder.append(updates);
- builder.append(",");
- builder.append(deletes);
- return builder.toString();
- }
-
- @Override
- public String toString() {
- StringBuilder builder = new StringBuilder();
- builder.append(" inserts: ").append(inserts);
- builder.append(" updates: ").append(updates);
- builder.append(" deletes: ").append(deletes);
- return builder.toString();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/orc/impl/BitFieldReader.java
deleted file mode 100644
index dda7355..0000000
--- a/orc/src/java/org/apache/orc/impl/BitFieldReader.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.impl.PositionProvider;
-import org.apache.orc.impl.RunLengthByteReader;
-
-public class BitFieldReader {
- private final RunLengthByteReader input;
- /** The number of bits in one item. Non-test code always uses 1. */
- private final int bitSize;
- private int current;
- private int bitsLeft;
- private final int mask;
-
- public BitFieldReader(InStream input,
- int bitSize) throws IOException {
- this.input = new RunLengthByteReader(input);
- this.bitSize = bitSize;
- mask = (1 << bitSize) - 1;
- }
-
- public void setInStream(InStream inStream) {
- this.input.setInStream(inStream);
- }
-
- private void readByte() throws IOException {
- if (input.hasNext()) {
- current = 0xff & input.next();
- bitsLeft = 8;
- } else {
- throw new EOFException("Read past end of bit field from " + this);
- }
- }
-
- public int next() throws IOException {
- int result = 0;
- int bitsLeftToRead = bitSize;
- while (bitsLeftToRead > bitsLeft) {
- result <<= bitsLeft;
- result |= current & ((1 << bitsLeft) - 1);
- bitsLeftToRead -= bitsLeft;
- readByte();
- }
- if (bitsLeftToRead > 0) {
- result <<= bitsLeftToRead;
- bitsLeft -= bitsLeftToRead;
- result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
- }
- return result & mask;
- }
-
- /**
- * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
- * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
- * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
- * work anymore once this is called. These is trivial to fix, but these are never interspersed.
- */
- private boolean lastRunValue;
- private int lastRunLength = -1;
- private void readNextRun(int maxRunLength) throws IOException {
- assert bitSize == 1;
- if (lastRunLength > 0) return; // last run is not exhausted yet
- if (bitsLeft == 0) {
- readByte();
- }
- // First take care of the partial bits.
- boolean hasVal = false;
- int runLength = 0;
- if (bitsLeft != 8) {
- int partialBitsMask = (1 << bitsLeft) - 1;
- int partialBits = current & partialBitsMask;
- if (partialBits == partialBitsMask || partialBits == 0) {
- lastRunValue = (partialBits == partialBitsMask);
- if (maxRunLength <= bitsLeft) {
- lastRunLength = maxRunLength;
- return;
- }
- maxRunLength -= bitsLeft;
- hasVal = true;
- runLength = bitsLeft;
- bitsLeft = 0;
- } else {
- // There's no run in partial bits. Return whatever we have.
- int prefixBitsCount = 32 - bitsLeft;
- runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
- lastRunValue = (runLength > 0);
- lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
- (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
- return;
- }
- assert bitsLeft == 0;
- readByte();
- }
- if (!hasVal) {
- lastRunValue = ((current >> 7) == 1);
- hasVal = true;
- }
- // Read full bytes until the run ends.
- assert bitsLeft == 8;
- while (maxRunLength >= 8
- && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
- runLength += 8;
- maxRunLength -= 8;
- readByte();
- }
- if (maxRunLength > 0) {
- int extraBits = Integer.numberOfLeadingZeros(
- lastRunValue ? (~(current | ~255)) : current) - 24;
- bitsLeft -= extraBits;
- runLength += extraBits;
- }
- lastRunLength = runLength;
- }
-
- public void nextVector(LongColumnVector previous,
- long previousLen) throws IOException {
- previous.isRepeating = true;
- for (int i = 0; i < previousLen; i++) {
- if (previous.noNulls || !previous.isNull[i]) {
- previous.vector[i] = next();
- } else {
- // The default value of null for int types in vectorized
- // processing is 1, so set that if the value is null
- previous.vector[i] = 1;
- }
-
- // The default value for nulls in Vectorization for int types is 1
- // and given that non null value can also be 1, we need to check for isNull also
- // when determining the isRepeating flag.
- if (previous.isRepeating
- && i > 0
- && ((previous.vector[0] != previous.vector[i]) ||
- (previous.isNull[0] != previous.isNull[i]))) {
- previous.isRepeating = false;
- }
- }
- }
-
- public void seek(PositionProvider index) throws IOException {
- input.seek(index);
- int consumed = (int) index.getNext();
- if (consumed > 8) {
- throw new IllegalArgumentException("Seek past end of byte at " +
- consumed + " in " + input);
- } else if (consumed != 0) {
- readByte();
- bitsLeft = 8 - consumed;
- } else {
- bitsLeft = 0;
- }
- }
-
- public void skip(long items) throws IOException {
- long totalBits = bitSize * items;
- if (bitsLeft >= totalBits) {
- bitsLeft -= totalBits;
- } else {
- totalBits -= bitsLeft;
- input.skip(totalBits / 8);
- current = input.next();
- bitsLeft = (int) (8 - (totalBits % 8));
- }
- }
-
- @Override
- public String toString() {
- return "bit reader current: " + current + " bits left: " + bitsLeft +
- " bit size: " + bitSize + " from " + input;
- }
-
- boolean hasFullByte() {
- return bitsLeft == 8 || bitsLeft == 0;
- }
-
- int peekOneBit() throws IOException {
- assert bitSize == 1;
- if (bitsLeft == 0) {
- readByte();
- }
- return (current >>> (bitsLeft - 1)) & 1;
- }
-
- int peekFullByte() throws IOException {
- assert bitSize == 1;
- assert bitsLeft == 8 || bitsLeft == 0;
- if (bitsLeft == 0) {
- readByte();
- }
- return current;
- }
-
- void skipInCurrentByte(int bits) throws IOException {
- assert bitsLeft >= bits;
- bitsLeft -= bits;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java b/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
deleted file mode 100644
index aa5f886..0000000
--- a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import org.apache.orc.impl.PositionRecorder;
-import org.apache.orc.impl.PositionedOutputStream;
-import org.apache.orc.impl.RunLengthByteWriter;
-
-import java.io.IOException;
-
-public class BitFieldWriter {
- private RunLengthByteWriter output;
- private final int bitSize;
- private byte current = 0;
- private int bitsLeft = 8;
-
- public BitFieldWriter(PositionedOutputStream output,
- int bitSize) throws IOException {
- this.output = new RunLengthByteWriter(output);
- this.bitSize = bitSize;
- }
-
- private void writeByte() throws IOException {
- output.write(current);
- current = 0;
- bitsLeft = 8;
- }
-
- public void flush() throws IOException {
- if (bitsLeft != 8) {
- writeByte();
- }
- output.flush();
- }
-
- public void write(int value) throws IOException {
- int bitsToWrite = bitSize;
- while (bitsToWrite > bitsLeft) {
- // add the bits to the bottom of the current word
- current |= value >>> (bitsToWrite - bitsLeft);
- // subtract out the bits we just added
- bitsToWrite -= bitsLeft;
- // zero out the bits above bitsToWrite
- value &= (1 << bitsToWrite) - 1;
- writeByte();
- }
- bitsLeft -= bitsToWrite;
- current |= value << bitsLeft;
- if (bitsLeft == 0) {
- writeByte();
- }
- }
-
- public void getPosition(PositionRecorder recorder) throws IOException {
- output.getPosition(recorder);
- recorder.addPosition(8 - bitsLeft);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/BufferChunk.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BufferChunk.java b/orc/src/java/org/apache/orc/impl/BufferChunk.java
deleted file mode 100644
index da43b96..0000000
--- a/orc/src/java/org/apache/orc/impl/BufferChunk.java
+++ /dev/null
@@ -1,85 +0,0 @@
-package org.apache.orc.impl;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.nio.ByteBuffer;
-
-/**
- * The sections of stripe that we have read.
- * This might not match diskRange - 1 disk range can be multiple buffer chunks,
- * depending on DFS block boundaries.
- */
-public class BufferChunk extends DiskRangeList {
-
- private static final Logger LOG =
- LoggerFactory.getLogger(BufferChunk.class);
- final ByteBuffer chunk;
-
- public BufferChunk(ByteBuffer chunk, long offset) {
- super(offset, offset + chunk.remaining());
- this.chunk = chunk;
- }
-
- public ByteBuffer getChunk() {
- return chunk;
- }
-
- @Override
- public boolean hasData() {
- return chunk != null;
- }
-
- @Override
- public final String toString() {
- boolean makesSense = chunk.remaining() == (end - offset);
- return "data range [" + offset + ", " + end + "), size: " + chunk.remaining()
- + (makesSense ? "" : "(!)") + " type: " +
- (chunk.isDirect() ? "direct" : "array-backed");
- }
-
- @Override
- public DiskRange sliceAndShift(long offset, long end, long shiftBy) {
- assert offset <= end && offset >= this.offset && end <= this.end;
- assert offset + shiftBy >= 0;
- ByteBuffer sliceBuf = chunk.slice();
- int newPos = (int) (offset - this.offset);
- int newLimit = newPos + (int) (end - offset);
- try {
- sliceBuf.position(newPos);
- sliceBuf.limit(newLimit);
- } catch (Throwable t) {
- LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end
- + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", "
- + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") "
- + t.getClass());
- throw new RuntimeException(t);
- }
- return new BufferChunk(sliceBuf, offset + shiftBy);
- }
-
- @Override
- public ByteBuffer getData() {
- return chunk;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
deleted file mode 100644
index 1118c5c..0000000
--- a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ /dev/null
@@ -1,1101 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.orc.BinaryColumnStatistics;
-import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.DateColumnStatistics;
-import org.apache.orc.DecimalColumnStatistics;
-import org.apache.orc.DoubleColumnStatistics;
-import org.apache.orc.IntegerColumnStatistics;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.TimestampColumnStatistics;
-import org.apache.orc.TypeDescription;
-
-public class ColumnStatisticsImpl implements ColumnStatistics {
-
- private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
- implements BooleanColumnStatistics {
- private long trueCount = 0;
-
- BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
- trueCount = bkt.getCount(0);
- }
-
- BooleanStatisticsImpl() {
- }
-
- @Override
- public void reset() {
- super.reset();
- trueCount = 0;
- }
-
- @Override
- public void updateBoolean(boolean value, int repetitions) {
- if (value) {
- trueCount += repetitions;
- }
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof BooleanStatisticsImpl) {
- BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
- trueCount += bkt.trueCount;
- } else {
- if (isStatsExists() && trueCount != 0) {
- throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder = super.serialize();
- OrcProto.BucketStatistics.Builder bucket =
- OrcProto.BucketStatistics.newBuilder();
- bucket.addCount(trueCount);
- builder.setBucketStatistics(bucket);
- return builder;
- }
-
- @Override
- public long getFalseCount() {
- return getNumberOfValues() - trueCount;
- }
-
- @Override
- public long getTrueCount() {
- return trueCount;
- }
-
- @Override
- public String toString() {
- return super.toString() + " true: " + trueCount;
- }
- }
-
- private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
- implements IntegerColumnStatistics {
-
- private long minimum = Long.MAX_VALUE;
- private long maximum = Long.MIN_VALUE;
- private long sum = 0;
- private boolean hasMinimum = false;
- private boolean overflow = false;
-
- IntegerStatisticsImpl() {
- }
-
- IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
- if (intStat.hasMinimum()) {
- hasMinimum = true;
- minimum = intStat.getMinimum();
- }
- if (intStat.hasMaximum()) {
- maximum = intStat.getMaximum();
- }
- if (intStat.hasSum()) {
- sum = intStat.getSum();
- } else {
- overflow = true;
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- hasMinimum = false;
- minimum = Long.MAX_VALUE;
- maximum = Long.MIN_VALUE;
- sum = 0;
- overflow = false;
- }
-
- @Override
- public void updateInteger(long value, int repetitions) {
- if (!hasMinimum) {
- hasMinimum = true;
- minimum = value;
- maximum = value;
- } else if (value < minimum) {
- minimum = value;
- } else if (value > maximum) {
- maximum = value;
- }
- if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += value * repetitions;
- if ((value >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
- }
- }
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof IntegerStatisticsImpl) {
- IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
- if (!hasMinimum) {
- hasMinimum = otherInt.hasMinimum;
- minimum = otherInt.minimum;
- maximum = otherInt.maximum;
- } else if (otherInt.hasMinimum) {
- if (otherInt.minimum < minimum) {
- minimum = otherInt.minimum;
- }
- if (otherInt.maximum > maximum) {
- maximum = otherInt.maximum;
- }
- }
-
- overflow |= otherInt.overflow;
- if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += otherInt.sum;
- if ((otherInt.sum >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
- }
- }
- } else {
- if (isStatsExists() && hasMinimum) {
- throw new IllegalArgumentException("Incompatible merging of integer column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder = super.serialize();
- OrcProto.IntegerStatistics.Builder intb =
- OrcProto.IntegerStatistics.newBuilder();
- if (hasMinimum) {
- intb.setMinimum(minimum);
- intb.setMaximum(maximum);
- }
- if (!overflow) {
- intb.setSum(sum);
- }
- builder.setIntStatistics(intb);
- return builder;
- }
-
- @Override
- public long getMinimum() {
- return minimum;
- }
-
- @Override
- public long getMaximum() {
- return maximum;
- }
-
- @Override
- public boolean isSumDefined() {
- return !overflow;
- }
-
- @Override
- public long getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (hasMinimum) {
- buf.append(" min: ");
- buf.append(minimum);
- buf.append(" max: ");
- buf.append(maximum);
- }
- if (!overflow) {
- buf.append(" sum: ");
- buf.append(sum);
- }
- return buf.toString();
- }
- }
-
- private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
- implements DoubleColumnStatistics {
- private boolean hasMinimum = false;
- private double minimum = Double.MAX_VALUE;
- private double maximum = Double.MIN_VALUE;
- private double sum = 0;
-
- DoubleStatisticsImpl() {
- }
-
- DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
- if (dbl.hasMinimum()) {
- hasMinimum = true;
- minimum = dbl.getMinimum();
- }
- if (dbl.hasMaximum()) {
- maximum = dbl.getMaximum();
- }
- if (dbl.hasSum()) {
- sum = dbl.getSum();
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- hasMinimum = false;
- minimum = Double.MAX_VALUE;
- maximum = Double.MIN_VALUE;
- sum = 0;
- }
-
- @Override
- public void updateDouble(double value) {
- if (!hasMinimum) {
- hasMinimum = true;
- minimum = value;
- maximum = value;
- } else if (value < minimum) {
- minimum = value;
- } else if (value > maximum) {
- maximum = value;
- }
- sum += value;
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof DoubleStatisticsImpl) {
- DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
- if (!hasMinimum) {
- hasMinimum = dbl.hasMinimum;
- minimum = dbl.minimum;
- maximum = dbl.maximum;
- } else if (dbl.hasMinimum) {
- if (dbl.minimum < minimum) {
- minimum = dbl.minimum;
- }
- if (dbl.maximum > maximum) {
- maximum = dbl.maximum;
- }
- }
- sum += dbl.sum;
- } else {
- if (isStatsExists() && hasMinimum) {
- throw new IllegalArgumentException("Incompatible merging of double column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder = super.serialize();
- OrcProto.DoubleStatistics.Builder dbl =
- OrcProto.DoubleStatistics.newBuilder();
- if (hasMinimum) {
- dbl.setMinimum(minimum);
- dbl.setMaximum(maximum);
- }
- dbl.setSum(sum);
- builder.setDoubleStatistics(dbl);
- return builder;
- }
-
- @Override
- public double getMinimum() {
- return minimum;
- }
-
- @Override
- public double getMaximum() {
- return maximum;
- }
-
- @Override
- public double getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (hasMinimum) {
- buf.append(" min: ");
- buf.append(minimum);
- buf.append(" max: ");
- buf.append(maximum);
- }
- buf.append(" sum: ");
- buf.append(sum);
- return buf.toString();
- }
- }
-
- protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
- implements StringColumnStatistics {
- private Text minimum = null;
- private Text maximum = null;
- private long sum = 0;
-
- StringStatisticsImpl() {
- }
-
- StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.StringStatistics str = stats.getStringStatistics();
- if (str.hasMaximum()) {
- maximum = new Text(str.getMaximum());
- }
- if (str.hasMinimum()) {
- minimum = new Text(str.getMinimum());
- }
- if(str.hasSum()) {
- sum = str.getSum();
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- sum = 0;
- }
-
- @Override
- public void updateString(Text value) {
- if (minimum == null) {
- maximum = minimum = new Text(value);
- } else if (minimum.compareTo(value) > 0) {
- minimum = new Text(value);
- } else if (maximum.compareTo(value) < 0) {
- maximum = new Text(value);
- }
- sum += value.getLength();
- }
-
- @Override
- public void updateString(byte[] bytes, int offset, int length,
- int repetitions) {
- if (minimum == null) {
- maximum = minimum = new Text();
- maximum.set(bytes, offset, length);
- } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
- minimum.getLength(), bytes, offset, length) > 0) {
- minimum = new Text();
- minimum.set(bytes, offset, length);
- } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
- maximum.getLength(), bytes, offset, length) < 0) {
- maximum = new Text();
- maximum.set(bytes, offset, length);
- }
- sum += length * repetitions;
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof StringStatisticsImpl) {
- StringStatisticsImpl str = (StringStatisticsImpl) other;
- if (minimum == null) {
- if (str.minimum != null) {
- maximum = new Text(str.getMaximum());
- minimum = new Text(str.getMinimum());
- } else {
- /* both are empty */
- maximum = minimum = null;
- }
- } else if (str.minimum != null) {
- if (minimum.compareTo(str.minimum) > 0) {
- minimum = new Text(str.getMinimum());
- }
- if (maximum.compareTo(str.maximum) < 0) {
- maximum = new Text(str.getMaximum());
- }
- }
- sum += str.sum;
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of string column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.StringStatistics.Builder str =
- OrcProto.StringStatistics.newBuilder();
- if (getNumberOfValues() != 0) {
- str.setMinimum(getMinimum());
- str.setMaximum(getMaximum());
- str.setSum(sum);
- }
- result.setStringStatistics(str);
- return result;
- }
-
- @Override
- public String getMinimum() {
- return minimum == null ? null : minimum.toString();
- }
-
- @Override
- public String getMaximum() {
- return maximum == null ? null : maximum.toString();
- }
-
- @Override
- public long getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(getMinimum());
- buf.append(" max: ");
- buf.append(getMaximum());
- buf.append(" sum: ");
- buf.append(sum);
- }
- return buf.toString();
- }
- }
-
- protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
- BinaryColumnStatistics {
-
- private long sum = 0;
-
- BinaryStatisticsImpl() {
- }
-
- BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
- if (binStats.hasSum()) {
- sum = binStats.getSum();
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- sum = 0;
- }
-
- @Override
- public void updateBinary(BytesWritable value) {
- sum += value.getLength();
- }
-
- @Override
- public void updateBinary(byte[] bytes, int offset, int length,
- int repetitions) {
- sum += length * repetitions;
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof BinaryColumnStatistics) {
- BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
- sum += bin.sum;
- } else {
- if (isStatsExists() && sum != 0) {
- throw new IllegalArgumentException("Incompatible merging of binary column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public long getSum() {
- return sum;
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
- bin.setSum(sum);
- result.setBinaryStatistics(bin);
- return result;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" sum: ");
- buf.append(sum);
- }
- return buf.toString();
- }
- }
-
- private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
- implements DecimalColumnStatistics {
-
- // These objects are mutable for better performance.
- private HiveDecimalWritable minimum = null;
- private HiveDecimalWritable maximum = null;
- private HiveDecimalWritable sum = new HiveDecimalWritable(0);
-
- DecimalStatisticsImpl() {
- }
-
- DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
- if (dec.hasMaximum()) {
- maximum = new HiveDecimalWritable(dec.getMaximum());
- }
- if (dec.hasMinimum()) {
- minimum = new HiveDecimalWritable(dec.getMinimum());
- }
- if (dec.hasSum()) {
- sum = new HiveDecimalWritable(dec.getSum());
- } else {
- sum = null;
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- sum = new HiveDecimalWritable(0);
- }
-
- @Override
- public void updateDecimal(HiveDecimalWritable value) {
- if (minimum == null) {
- minimum = new HiveDecimalWritable(value);
- maximum = new HiveDecimalWritable(value);
- } else if (minimum.compareTo(value) > 0) {
- minimum.set(value);
- } else if (maximum.compareTo(value) < 0) {
- maximum.set(value);
- }
- if (sum != null) {
- sum.mutateAdd(value);
- }
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof DecimalStatisticsImpl) {
- DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
- if (minimum == null) {
- minimum = (dec.minimum != null ? new HiveDecimalWritable(dec.minimum) : null);
- maximum = (dec.maximum != null ? new HiveDecimalWritable(dec.maximum) : null);
- sum = dec.sum;
- } else if (dec.minimum != null) {
- if (minimum.compareTo(dec.minimum) > 0) {
- minimum.set(dec.minimum);
- }
- if (maximum.compareTo(dec.maximum) < 0) {
- maximum.set(dec.maximum);
- }
- if (sum == null || dec.sum == null) {
- sum = null;
- } else {
- sum.mutateAdd(dec.sum);
- }
- }
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.DecimalStatistics.Builder dec =
- OrcProto.DecimalStatistics.newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
- dec.setMinimum(minimum.toString());
- dec.setMaximum(maximum.toString());
- }
- // Check isSet for overflow.
- if (sum != null && sum.isSet()) {
- dec.setSum(sum.toString());
- }
- result.setDecimalStatistics(dec);
- return result;
- }
-
- @Override
- public HiveDecimal getMinimum() {
- return minimum.getHiveDecimal();
- }
-
- @Override
- public HiveDecimal getMaximum() {
- return maximum.getHiveDecimal();
- }
-
- @Override
- public HiveDecimal getSum() {
- return sum.getHiveDecimal();
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(minimum);
- buf.append(" max: ");
- buf.append(maximum);
- if (sum != null) {
- buf.append(" sum: ");
- buf.append(sum);
- }
- }
- return buf.toString();
- }
- }
-
- private static final class DateStatisticsImpl extends ColumnStatisticsImpl
- implements DateColumnStatistics {
- private Integer minimum = null;
- private Integer maximum = null;
-
- DateStatisticsImpl() {
- }
-
- DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.DateStatistics dateStats = stats.getDateStatistics();
- // min,max values serialized/deserialized as int (days since epoch)
- if (dateStats.hasMaximum()) {
- maximum = dateStats.getMaximum();
- }
- if (dateStats.hasMinimum()) {
- minimum = dateStats.getMinimum();
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- }
-
- @Override
- public void updateDate(DateWritable value) {
- if (minimum == null) {
- minimum = value.getDays();
- maximum = value.getDays();
- } else if (minimum > value.getDays()) {
- minimum = value.getDays();
- } else if (maximum < value.getDays()) {
- maximum = value.getDays();
- }
- }
-
- @Override
- public void updateDate(int value) {
- if (minimum == null) {
- minimum = value;
- maximum = value;
- } else if (minimum > value) {
- minimum = value;
- } else if (maximum < value) {
- maximum = value;
- }
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof DateStatisticsImpl) {
- DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
- if (minimum == null) {
- minimum = dateStats.minimum;
- maximum = dateStats.maximum;
- } else if (dateStats.minimum != null) {
- if (minimum > dateStats.minimum) {
- minimum = dateStats.minimum;
- }
- if (maximum < dateStats.maximum) {
- maximum = dateStats.maximum;
- }
- }
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of date column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.DateStatistics.Builder dateStats =
- OrcProto.DateStatistics.newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
- dateStats.setMinimum(minimum);
- dateStats.setMaximum(maximum);
- }
- result.setDateStatistics(dateStats);
- return result;
- }
-
- private transient final DateWritable minDate = new DateWritable();
- private transient final DateWritable maxDate = new DateWritable();
-
- @Override
- public Date getMinimum() {
- if (minimum == null) {
- return null;
- }
- minDate.set(minimum);
- return minDate.get();
- }
-
- @Override
- public Date getMaximum() {
- if (maximum == null) {
- return null;
- }
- maxDate.set(maximum);
- return maxDate.get();
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(getMinimum());
- buf.append(" max: ");
- buf.append(getMaximum());
- }
- return buf.toString();
- }
- }
-
- private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
- implements TimestampColumnStatistics {
- private Long minimum = null;
- private Long maximum = null;
-
- TimestampStatisticsImpl() {
- }
-
- TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
- // min,max values serialized/deserialized as int (milliseconds since epoch)
- if (timestampStats.hasMaximum()) {
- maximum = timestampStats.getMaximum();
- }
- if (timestampStats.hasMinimum()) {
- minimum = timestampStats.getMinimum();
- }
- }
-
- @Override
- public void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- }
-
- @Override
- public void updateTimestamp(Timestamp value) {
- if (minimum == null) {
- minimum = value.getTime();
- maximum = value.getTime();
- } else if (minimum > value.getTime()) {
- minimum = value.getTime();
- } else if (maximum < value.getTime()) {
- maximum = value.getTime();
- }
- }
-
- @Override
- public void updateTimestamp(long value) {
- if (minimum == null) {
- minimum = value;
- maximum = value;
- } else if (minimum > value) {
- minimum = value;
- } else if (maximum < value) {
- maximum = value;
- }
- }
-
- @Override
- public void merge(ColumnStatisticsImpl other) {
- if (other instanceof TimestampStatisticsImpl) {
- TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
- if (minimum == null) {
- minimum = timestampStats.minimum;
- maximum = timestampStats.maximum;
- } else if (timestampStats.minimum != null) {
- if (minimum > timestampStats.minimum) {
- minimum = timestampStats.minimum;
- }
- if (maximum < timestampStats.maximum) {
- maximum = timestampStats.maximum;
- }
- }
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
- .newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
- timestampStats.setMinimum(minimum);
- timestampStats.setMaximum(maximum);
- }
- result.setTimestampStatistics(timestampStats);
- return result;
- }
-
- @Override
- public Timestamp getMinimum() {
- return minimum == null ? null : new Timestamp(minimum);
- }
-
- @Override
- public Timestamp getMaximum() {
- return maximum == null ? null : new Timestamp(maximum);
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(getMinimum());
- buf.append(" max: ");
- buf.append(getMaximum());
- }
- return buf.toString();
- }
- }
-
- private long count = 0;
- private boolean hasNull = false;
-
- ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
- if (stats.hasNumberOfValues()) {
- count = stats.getNumberOfValues();
- }
-
- if (stats.hasHasNull()) {
- hasNull = stats.getHasNull();
- } else {
- hasNull = true;
- }
- }
-
- ColumnStatisticsImpl() {
- }
-
- public void increment() {
- count += 1;
- }
-
- public void increment(int count) {
- this.count += count;
- }
-
- public void setNull() {
- hasNull = true;
- }
-
- public void updateBoolean(boolean value, int repetitions) {
- throw new UnsupportedOperationException("Can't update boolean");
- }
-
- public void updateInteger(long value, int repetitions) {
- throw new UnsupportedOperationException("Can't update integer");
- }
-
- public void updateDouble(double value) {
- throw new UnsupportedOperationException("Can't update double");
- }
-
- public void updateString(Text value) {
- throw new UnsupportedOperationException("Can't update string");
- }
-
- public void updateString(byte[] bytes, int offset, int length,
- int repetitions) {
- throw new UnsupportedOperationException("Can't update string");
- }
-
- public void updateBinary(BytesWritable value) {
- throw new UnsupportedOperationException("Can't update binary");
- }
-
- public void updateBinary(byte[] bytes, int offset, int length,
- int repetitions) {
- throw new UnsupportedOperationException("Can't update string");
- }
-
- public void updateDecimal(HiveDecimalWritable value) {
- throw new UnsupportedOperationException("Can't update decimal");
- }
-
- public void updateDate(DateWritable value) {
- throw new UnsupportedOperationException("Can't update date");
- }
-
- public void updateDate(int value) {
- throw new UnsupportedOperationException("Can't update date");
- }
-
- public void updateTimestamp(Timestamp value) {
- throw new UnsupportedOperationException("Can't update timestamp");
- }
-
- public void updateTimestamp(long value) {
- throw new UnsupportedOperationException("Can't update timestamp");
- }
-
- public boolean isStatsExists() {
- return (count > 0 || hasNull == true);
- }
-
- public void merge(ColumnStatisticsImpl stats) {
- count += stats.count;
- hasNull |= stats.hasNull;
- }
-
- public void reset() {
- count = 0;
- hasNull = false;
- }
-
- @Override
- public long getNumberOfValues() {
- return count;
- }
-
- @Override
- public boolean hasNull() {
- return hasNull;
- }
-
- @Override
- public String toString() {
- return "count: " + count + " hasNull: " + hasNull;
- }
-
- public OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder =
- OrcProto.ColumnStatistics.newBuilder();
- builder.setNumberOfValues(count);
- builder.setHasNull(hasNull);
- return builder;
- }
-
- public static ColumnStatisticsImpl create(TypeDescription schema) {
- switch (schema.getCategory()) {
- case BOOLEAN:
- return new BooleanStatisticsImpl();
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- return new IntegerStatisticsImpl();
- case FLOAT:
- case DOUBLE:
- return new DoubleStatisticsImpl();
- case STRING:
- case CHAR:
- case VARCHAR:
- return new StringStatisticsImpl();
- case DECIMAL:
- return new DecimalStatisticsImpl();
- case DATE:
- return new DateStatisticsImpl();
- case TIMESTAMP:
- return new TimestampStatisticsImpl();
- case BINARY:
- return new BinaryStatisticsImpl();
- default:
- return new ColumnStatisticsImpl();
- }
- }
-
- public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
- if (stats.hasBucketStatistics()) {
- return new BooleanStatisticsImpl(stats);
- } else if (stats.hasIntStatistics()) {
- return new IntegerStatisticsImpl(stats);
- } else if (stats.hasDoubleStatistics()) {
- return new DoubleStatisticsImpl(stats);
- } else if (stats.hasStringStatistics()) {
- return new StringStatisticsImpl(stats);
- } else if (stats.hasDecimalStatistics()) {
- return new DecimalStatisticsImpl(stats);
- } else if (stats.hasDateStatistics()) {
- return new DateStatisticsImpl(stats);
- } else if (stats.hasTimestampStatistics()) {
- return new TimestampStatisticsImpl(stats);
- } else if(stats.hasBinaryStatistics()) {
- return new BinaryStatisticsImpl(stats);
- } else {
- return new ColumnStatisticsImpl(stats);
- }
- }
-}