You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/07/31 02:43:36 UTC
[36/43] hive git commit: HIVE-11253. Move SearchArgument and
VectorizedRowBatch classes to storage-api. (omalley reviewed by prasanthj)
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
deleted file mode 100644
index 3a92565..0000000
--- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.List;
-
-/**
- * The primitive predicates that form a SearchArgument.
- */
-public interface PredicateLeaf {
-
- /**
- * The possible operators for predicates. To get the opposites, construct
- * an expression with a not operator.
- */
- public static enum Operator {
- EQUALS,
- NULL_SAFE_EQUALS,
- LESS_THAN,
- LESS_THAN_EQUALS,
- IN,
- BETWEEN,
- IS_NULL
- }
-
- /**
- * The possible types for sargs.
- */
- public static enum Type {
- INTEGER(Integer.class), // all of the integer types except long
- LONG(Long.class),
- FLOAT(Double.class), // float and double
- STRING(String.class), // string, char, varchar
- DATE(Date.class),
- DECIMAL(HiveDecimalWritable.class),
- TIMESTAMP(Timestamp.class),
- BOOLEAN(Boolean.class);
-
- private final Class cls;
- Type(Class cls) {
- this.cls = cls;
- }
-
- /**
- * For all SARG leaves, the values must be the matching class.
- * @return the value class
- */
- public Class getValueClass() {
- return cls;
- }
- }
-
- /**
- * Get the operator for the leaf.
- */
- public Operator getOperator();
-
- /**
- * Get the type of the column and literal by the file format.
- */
- public Type getType();
-
- /**
- * Get the simple column name.
- * @return the column name
- */
- public String getColumnName();
-
- /**
- * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
- *
- * @return an Integer, Long, Double, or String
- */
- public Object getLiteral();
-
- /**
- * For operators with multiple literals (IN and BETWEEN), get the literals.
- *
- * @return the list of literals (Integer, Longs, Doubles, or Strings)
- *
- */
- public List<Object> getLiteralList();
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
deleted file mode 100644
index bc0d503..0000000
--- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
+++ /dev/null
@@ -1,298 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import java.util.List;
-
-/**
- * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable">
- * SearchArgument</a>, which are the subset of predicates
- * that can be pushed down to the RecordReader. Each SearchArgument consists
- * of a series of SearchClauses that must each be true for the row to be
- * accepted by the filter.
- *
- * This requires that the filter be normalized into conjunctive normal form
- * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>).
- */
-public interface SearchArgument {
-
- /**
- * The potential result sets of logical operations.
- */
- public static enum TruthValue {
- YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL;
-
- /**
- * Compute logical or between the two values.
- * @param right the other argument or null
- * @return the result
- */
- public TruthValue or(TruthValue right) {
- if (right == null || right == this) {
- return this;
- }
- if (right == YES || this == YES) {
- return YES;
- }
- if (right == YES_NULL || this == YES_NULL) {
- return YES_NULL;
- }
- if (right == NO) {
- return this;
- }
- if (this == NO) {
- return right;
- }
- if (this == NULL) {
- if (right == NO_NULL) {
- return NULL;
- } else {
- return YES_NULL;
- }
- }
- if (right == NULL) {
- if (this == NO_NULL) {
- return NULL;
- } else {
- return YES_NULL;
- }
- }
- return YES_NO_NULL;
- }
-
- /**
- * Compute logical AND between the two values.
- * @param right the other argument or null
- * @return the result
- */
- public TruthValue and(TruthValue right) {
- if (right == null || right == this) {
- return this;
- }
- if (right == NO || this == NO) {
- return NO;
- }
- if (right == NO_NULL || this == NO_NULL) {
- return NO_NULL;
- }
- if (right == YES) {
- return this;
- }
- if (this == YES) {
- return right;
- }
- if (this == NULL) {
- if (right == YES_NULL) {
- return NULL;
- } else {
- return NO_NULL;
- }
- }
- if (right == NULL) {
- if (this == YES_NULL) {
- return NULL;
- } else {
- return NO_NULL;
- }
- }
- return YES_NO_NULL;
- }
-
- public TruthValue not() {
- switch (this) {
- case NO:
- return YES;
- case YES:
- return NO;
- case NULL:
- case YES_NO:
- case YES_NO_NULL:
- return this;
- case NO_NULL:
- return YES_NULL;
- case YES_NULL:
- return NO_NULL;
- default:
- throw new IllegalArgumentException("Unknown value: " + this);
- }
- }
-
- /**
- * Does the RecordReader need to include this set of records?
- * @return true unless none of the rows qualify
- */
- public boolean isNeeded() {
- switch (this) {
- case NO:
- case NULL:
- case NO_NULL:
- return false;
- default:
- return true;
- }
- }
- }
-
- /**
- * Get the leaf predicates that are required to evaluate the predicate. The
- * list will have the duplicates removed.
- * @return the list of leaf predicates
- */
- public List<PredicateLeaf> getLeaves();
-
- /**
- * Get the expression tree. This should only needed for file formats that
- * need to translate the expression to an internal form.
- */
- public ExpressionTree getExpression();
-
- /**
- * Evaluate the entire predicate based on the values for the leaf predicates.
- * @param leaves the value of each leaf predicate
- * @return the value of hte entire predicate
- */
- public TruthValue evaluate(TruthValue[] leaves);
-
- /**
- * Serialize the SARG as a kyro object and return the base64 string.
- *
- * Hive should replace the current XML-based AST serialization for predicate pushdown
- * with the Kryo serialization of the SARG because the representation is much more
- * compact and focused on what is needed for predicate pushdown.
- *
- * @return the serialized SARG
- */
- public String toKryo();
-
- /**
- * A builder object for contexts outside of Hive where it isn't easy to
- * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
- * before adding any leaves.
- */
- public interface Builder {
-
- /**
- * Start building an or operation and push it on the stack.
- * @return this
- */
- public Builder startOr();
-
- /**
- * Start building an and operation and push it on the stack.
- * @return this
- */
- public Builder startAnd();
-
- /**
- * Start building a not operation and push it on the stack.
- * @return this
- */
- public Builder startNot();
-
- /**
- * Finish the current operation and pop it off of the stack. Each start
- * call must have a matching end.
- * @return this
- */
- public Builder end();
-
- /**
- * Add a less than leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @param literal the literal
- * @return this
- */
- public Builder lessThan(String column, PredicateLeaf.Type type,
- Object literal);
-
- /**
- * Add a less than equals leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @param literal the literal
- * @return this
- */
- public Builder lessThanEquals(String column, PredicateLeaf.Type type,
- Object literal);
-
- /**
- * Add an equals leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @param literal the literal
- * @return this
- */
- public Builder equals(String column, PredicateLeaf.Type type,
- Object literal);
-
- /**
- * Add a null safe equals leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @param literal the literal
- * @return this
- */
- public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
- Object literal);
-
- /**
- * Add an in leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @param literal the literal
- * @return this
- */
- public Builder in(String column, PredicateLeaf.Type type,
- Object... literal);
-
- /**
- * Add an is null leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @return this
- */
- public Builder isNull(String column, PredicateLeaf.Type type);
-
- /**
- * Add a between leaf to the current item on the stack.
- * @param column the name of the column
- * @param type the type of the expression
- * @param lower the literal
- * @param upper the literal
- * @return this
- */
- public Builder between(String column, PredicateLeaf.Type type,
- Object lower, Object upper);
-
- /**
- * Add a truth value to the expression.
- * @param truth
- * @return this
- */
- public Builder literal(TruthValue truth);
-
- /**
- * Build and return the SearchArgument that has been defined. All of the
- * starts must have been ended before this call.
- * @return the new SearchArgument
- */
- public SearchArgument build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
deleted file mode 100644
index 0578d24..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.serde2.io;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.math.BigInteger;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
-
-public class HiveDecimalWritable implements WritableComparable<HiveDecimalWritable> {
-
- static final private Log LOG = LogFactory.getLog(HiveDecimalWritable.class);
-
- private byte[] internalStorage = new byte[0];
- private int scale;
-
- public HiveDecimalWritable() {
- }
-
- public HiveDecimalWritable(String value) {
- set(HiveDecimal.create(value));
- }
-
- public HiveDecimalWritable(byte[] bytes, int scale) {
- set(bytes, scale);
- }
-
- public HiveDecimalWritable(HiveDecimalWritable writable) {
- set(writable.getHiveDecimal());
- }
-
- public HiveDecimalWritable(HiveDecimal value) {
- set(value);
- }
-
- public HiveDecimalWritable(long value) {
- set((HiveDecimal.create(value)));
- }
-
- public void set(HiveDecimal value) {
- set(value.unscaledValue().toByteArray(), value.scale());
- }
-
- public void set(HiveDecimal value, int maxPrecision, int maxScale) {
- set(HiveDecimal.enforcePrecisionScale(value, maxPrecision, maxScale));
- }
-
- public void set(HiveDecimalWritable writable) {
- set(writable.getHiveDecimal());
- }
-
- public void set(byte[] bytes, int scale) {
- this.internalStorage = bytes;
- this.scale = scale;
- }
-
- public HiveDecimal getHiveDecimal() {
- return HiveDecimal.create(new BigInteger(internalStorage), scale);
- }
-
- /**
- * Get a HiveDecimal instance from the writable and constraint it with maximum precision/scale.
- *
- * @param maxPrecision maximum precision
- * @param maxScale maximum scale
- * @return HiveDecimal instance
- */
- public HiveDecimal getHiveDecimal(int maxPrecision, int maxScale) {
- return HiveDecimal.enforcePrecisionScale(HiveDecimal.
- create(new BigInteger(internalStorage), scale),
- maxPrecision, maxScale);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- scale = WritableUtils.readVInt(in);
- int byteArrayLen = WritableUtils.readVInt(in);
- if (internalStorage.length != byteArrayLen) {
- internalStorage = new byte[byteArrayLen];
- }
- in.readFully(internalStorage);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- WritableUtils.writeVInt(out, scale);
- WritableUtils.writeVInt(out, internalStorage.length);
- out.write(internalStorage);
- }
-
- @Override
- public int compareTo(HiveDecimalWritable that) {
- return getHiveDecimal().compareTo(that.getHiveDecimal());
- }
-
- @Override
- public String toString() {
- return getHiveDecimal().toString();
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- }
- if (other == null || getClass() != other.getClass()) {
- return false;
- }
- HiveDecimalWritable bdw = (HiveDecimalWritable) other;
-
- // 'equals' and 'compareTo' are not compatible with HiveDecimals. We want
- // compareTo which returns true iff the numbers are equal (e.g.: 3.14 is
- // the same as 3.140). 'Equals' returns true iff equal and the same scale
- // is set in the decimals (e.g.: 3.14 is not the same as 3.140)
- return getHiveDecimal().compareTo(bdw.getHiveDecimal()) == 0;
- }
-
- @Override
- public int hashCode() {
- return getHiveDecimal().hashCode();
- }
-
- /* (non-Javadoc)
- * In order to update a Decimal128 fast (w/o allocation) we need to expose access to the
- * internal storage bytes and scale.
- * @return
- */
- public byte[] getInternalStorage() {
- return internalStorage;
- }
-
- /* (non-Javadoc)
- * In order to update a Decimal128 fast (w/o allocation) we need to expose access to the
- * internal storage bytes and scale.
- */
- public int getScale() {
- return scale;
- }
-
- public static
- HiveDecimalWritable enforcePrecisionScale(HiveDecimalWritable writable,
- int precision, int scale) {
- if (writable == null) {
- return null;
- }
-
- HiveDecimal dec =
- HiveDecimal.enforcePrecisionScale(writable.getHiveDecimal(), precision,
- scale);
- return dec == null ? null : new HiveDecimalWritable(dec);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/pom.xml
----------------------------------------------------------------------
diff --git a/storage-api/pom.xml b/storage-api/pom.xml
new file mode 100644
index 0000000..71b51b8
--- /dev/null
+++ b/storage-api/pom.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive</artifactId>
+ <version>2.0.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>hive-storage-api</artifactId>
+ <packaging>jar</packaging>
+ <name>Hive Storage API</name>
+
+ <properties>
+ <hive.path.to.root>..</hive.path.to.root>
+ </properties>
+
+ <dependencies>
+ <!-- dependencies are always listed in sorted order by groupId, artifectId -->
+ <!-- inter-project -->
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ <!-- test inter-project -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <profiles>
+ <profile>
+ <id>hadoop-1</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>${hadoop-20S.version}</version>
+ <optional>true</optional>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>hadoop-2</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-23.version}</version>
+ <optional>true</optional>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+
+ <build>
+ <sourceDirectory>${basedir}/src/java</sourceDirectory>
+ <testSourceDirectory>${basedir}/src/test</testSourceDirectory>
+ <testResources>
+ <testResource>
+ <directory>${basedir}/src/test/resources</directory>
+ </testResource>
+ </testResources>
+ </build>
+</project>
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
new file mode 100644
index 0000000..7d7fb28
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
@@ -0,0 +1,312 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.RoundingMode;
+
+/**
+ *
+ * HiveDecimal. Simple wrapper for BigDecimal. Adds fixed max precision and non scientific string
+ * representation
+ *
+ */
+public class HiveDecimal implements Comparable<HiveDecimal> {
+ public static final int MAX_PRECISION = 38;
+ public static final int MAX_SCALE = 38;
+
+ /**
+ * Default precision/scale when user doesn't specify in the column metadata, such as
+ * decimal and decimal(8).
+ */
+ public static final int USER_DEFAULT_PRECISION = 10;
+ public static final int USER_DEFAULT_SCALE = 0;
+
+ /**
+ * Default precision/scale when system is not able to determine them, such as in case
+ * of a non-generic udf.
+ */
+ public static final int SYSTEM_DEFAULT_PRECISION = 38;
+ public static final int SYSTEM_DEFAULT_SCALE = 18;
+
+ public static final HiveDecimal ZERO = new HiveDecimal(BigDecimal.ZERO);
+ public static final HiveDecimal ONE = new HiveDecimal(BigDecimal.ONE);
+
+ public static final int ROUND_FLOOR = BigDecimal.ROUND_FLOOR;
+ public static final int ROUND_CEILING = BigDecimal.ROUND_CEILING;
+ public static final int ROUND_HALF_UP = BigDecimal.ROUND_HALF_UP;
+
+ private BigDecimal bd = BigDecimal.ZERO;
+
+ private HiveDecimal(BigDecimal bd) {
+ this.bd = bd;
+ }
+
+ public static HiveDecimal create(BigDecimal b) {
+ return create(b, true);
+ }
+
+ public static HiveDecimal create(BigDecimal b, boolean allowRounding) {
+ BigDecimal bd = normalize(b, allowRounding);
+ return bd == null ? null : new HiveDecimal(bd);
+ }
+
+ public static HiveDecimal create(BigInteger unscaled, int scale) {
+ BigDecimal bd = normalize(new BigDecimal(unscaled, scale), true);
+ return bd == null ? null : new HiveDecimal(bd);
+ }
+
+ public static HiveDecimal create(String dec) {
+ BigDecimal bd;
+ try {
+ bd = new BigDecimal(dec.trim());
+ } catch (NumberFormatException ex) {
+ return null;
+ }
+
+ bd = normalize(bd, true);
+ return bd == null ? null : new HiveDecimal(bd);
+ }
+
+ public static HiveDecimal create(BigInteger bi) {
+ BigDecimal bd = normalize(new BigDecimal(bi), true);
+ return bd == null ? null : new HiveDecimal(bd);
+ }
+
+ public static HiveDecimal create(int i) {
+ return new HiveDecimal(new BigDecimal(i));
+ }
+
+ public static HiveDecimal create(long l) {
+ return new HiveDecimal(new BigDecimal(l));
+ }
+
+ @Override
+ public String toString() {
+ return bd.toPlainString();
+ }
+
+ public HiveDecimal setScale(int i) {
+ return new HiveDecimal(bd.setScale(i, RoundingMode.HALF_UP));
+ }
+
+ @Override
+ public int compareTo(HiveDecimal dec) {
+ return bd.compareTo(dec.bd);
+ }
+
+ @Override
+ public int hashCode() {
+ return bd.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null || obj.getClass() != getClass()) {
+ return false;
+ }
+ return bd.equals(((HiveDecimal) obj).bd);
+ }
+
+ public int scale() {
+ return bd.scale();
+ }
+
+ /**
+ * Returns the number of digits (integer and fractional) in the number, which is equivalent
+ * to SQL decimal precision. Note that this is different from BigDecimal.precision(),
+ * which returns the precision of the unscaled value (BigDecimal.valueOf(0.01).precision() = 1,
+ * whereas HiveDecimal.create("0.01").precision() = 2).
+ * If you want the BigDecimal precision, use HiveDecimal.bigDecimalValue().precision()
+ * @return
+ */
+ public int precision() {
+ int bdPrecision = bd.precision();
+ int bdScale = bd.scale();
+
+ if (bdPrecision < bdScale) {
+ // This can happen for numbers less than 0.1
+ // For 0.001234: bdPrecision=4, bdScale=6
+ // In this case, we'll set the type to have the same precision as the scale.
+ return bdScale;
+ }
+ return bdPrecision;
+ }
+
+ public int intValue() {
+ return bd.intValue();
+ }
+
+ public double doubleValue() {
+ return bd.doubleValue();
+ }
+
+ public long longValue() {
+ return bd.longValue();
+ }
+
+ public short shortValue() {
+ return bd.shortValue();
+ }
+
+ public float floatValue() {
+ return bd.floatValue();
+ }
+
+ public BigDecimal bigDecimalValue() {
+ return bd;
+ }
+
+ public byte byteValue() {
+ return bd.byteValue();
+ }
+
+ public HiveDecimal setScale(int adjustedScale, int rm) {
+ return create(bd.setScale(adjustedScale, rm));
+ }
+
+ public HiveDecimal subtract(HiveDecimal dec) {
+ return create(bd.subtract(dec.bd));
+ }
+
+ public HiveDecimal multiply(HiveDecimal dec) {
+ return create(bd.multiply(dec.bd), false);
+ }
+
+ public BigInteger unscaledValue() {
+ return bd.unscaledValue();
+ }
+
+ public HiveDecimal scaleByPowerOfTen(int n) {
+ return create(bd.scaleByPowerOfTen(n));
+ }
+
+ public HiveDecimal abs() {
+ return create(bd.abs());
+ }
+
+ public HiveDecimal negate() {
+ return create(bd.negate());
+ }
+
+ public HiveDecimal add(HiveDecimal dec) {
+ return create(bd.add(dec.bd));
+ }
+
+ public HiveDecimal pow(int n) {
+ BigDecimal result = normalize(bd.pow(n), false);
+ return result == null ? null : new HiveDecimal(result);
+ }
+
+ public HiveDecimal remainder(HiveDecimal dec) {
+ return create(bd.remainder(dec.bd));
+ }
+
+ public HiveDecimal divide(HiveDecimal dec) {
+ return create(bd.divide(dec.bd, MAX_SCALE, RoundingMode.HALF_UP), true);
+ }
+
+ /**
+ * Get the sign of the underlying decimal.
+ * @return 0 if the decimal is equal to 0, -1 if less than zero, and 1 if greater than 0
+ */
+ public int signum() {
+ return bd.signum();
+ }
+
+ private static BigDecimal trim(BigDecimal d) {
+ if (d.compareTo(BigDecimal.ZERO) == 0) {
+ // Special case for 0, because java doesn't strip zeros correctly on that number.
+ d = BigDecimal.ZERO;
+ } else {
+ d = d.stripTrailingZeros();
+ if (d.scale() < 0) {
+ // no negative scale decimals
+ d = d.setScale(0);
+ }
+ }
+ return d;
+ }
+
+ private static BigDecimal normalize(BigDecimal bd, boolean allowRounding) {
+ if (bd == null) {
+ return null;
+ }
+
+ bd = trim(bd);
+
+ int intDigits = bd.precision() - bd.scale();
+
+ if (intDigits > MAX_PRECISION) {
+ return null;
+ }
+
+ int maxScale = Math.min(MAX_SCALE, Math.min(MAX_PRECISION - intDigits, bd.scale()));
+ if (bd.scale() > maxScale ) {
+ if (allowRounding) {
+ bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
+ // Trimming is again necessary, because rounding may introduce new trailing 0's.
+ bd = trim(bd);
+ } else {
+ bd = null;
+ }
+ }
+
+ return bd;
+ }
+
+ public static BigDecimal enforcePrecisionScale(BigDecimal bd, int maxPrecision, int maxScale) {
+ if (bd == null) {
+ return null;
+ }
+
+ bd = trim(bd);
+
+ if (bd.scale() > maxScale) {
+ bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
+ }
+
+ int maxIntDigits = maxPrecision - maxScale;
+ int intDigits = bd.precision() - bd.scale();
+ if (intDigits > maxIntDigits) {
+ return null;
+ }
+
+ return bd;
+ }
+
+ public static HiveDecimal enforcePrecisionScale(HiveDecimal dec, int maxPrecision, int maxScale) {
+ if (dec == null) {
+ return null;
+ }
+
+ // Minor optimization, avoiding creating new objects.
+ if (dec.precision() - dec.scale() <= maxPrecision - maxScale &&
+ dec.scale() <= maxScale) {
+ return dec;
+ }
+
+ BigDecimal bd = enforcePrecisionScale(dec.bd, maxPrecision, maxScale);
+ if (bd == null) {
+ return null;
+ }
+
+ return HiveDecimal.create(bd);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
new file mode 100644
index 0000000..02c52fa
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -0,0 +1,322 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * This class supports string and binary data by value reference -- i.e. each field is
+ * explicitly present, as opposed to provided by a dictionary reference.
+ * In some cases, all the values will be in the same byte array to begin with,
+ * but this need not be the case. If each value is in a separate byte
+ * array to start with, or not all of the values are in the same original
+ * byte array, you can still assign data by reference into this column vector.
+ * This gives flexibility to use this in multiple situations.
+ * <p>
+ * When setting data by reference, the caller
+ * is responsible for allocating the byte arrays used to hold the data.
+ * You can also set data by value, as long as you call the initBuffer() method first.
+ * You can mix "by value" and "by reference" in the same column vector,
+ * though that use is probably not typical.
+ */
+public class BytesColumnVector extends ColumnVector {
+ public byte[][] vector;
+ public int[] start; // start offset of each field
+
+ /*
+ * The length of each field. If the value repeats for every entry, then it is stored
+ * in vector[0] and isRepeating from the superclass is set to true.
+ */
+ public int[] length;
+ private byte[] buffer; // optional buffer to use when actually copying in data
+ private int nextFree; // next free position in buffer
+
+ // Estimate that there will be 16 bytes per entry
+ static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE;
+
+ // Proportion of extra space to provide when allocating more buffer space.
+ static final float EXTRA_SPACE_FACTOR = (float) 1.2;
+
+ /**
+ * Use this constructor for normal operation.
+ * All column vectors should be the default size normally.
+ */
+ public BytesColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't call this constructor except for testing purposes.
+ *
+ * @param size number of elements in the column vector
+ */
+ public BytesColumnVector(int size) {
+ super(size);
+ vector = new byte[size][];
+ start = new int[size];
+ length = new int[size];
+ }
+
+ /**
+ * Additional reset work for BytesColumnVector (releasing scratch bytes for by value strings).
+ */
+ @Override
+ public void reset() {
+ super.reset();
+ initBuffer(0);
+ }
+
+ /** Set a field by reference.
+ *
+ * @param elementNum index within column vector to set
+ * @param sourceBuf container of source data
+ * @param start start byte position within source
+ * @param length length of source byte sequence
+ */
+ public void setRef(int elementNum, byte[] sourceBuf, int start, int length) {
+ vector[elementNum] = sourceBuf;
+ this.start[elementNum] = start;
+ this.length[elementNum] = length;
+ }
+
+ /**
+ * You must call initBuffer first before using setVal().
+ * Provide the estimated number of bytes needed to hold
+ * a full column vector worth of byte string data.
+ *
+ * @param estimatedValueSize Estimated size of buffer space needed
+ */
+ public void initBuffer(int estimatedValueSize) {
+ nextFree = 0;
+
+ // if buffer is already allocated, keep using it, don't re-allocate
+ if (buffer != null) {
+ return;
+ }
+
+ // allocate a little extra space to limit need to re-allocate
+ int bufferSize = this.vector.length * (int)(estimatedValueSize * EXTRA_SPACE_FACTOR);
+ if (bufferSize < DEFAULT_BUFFER_SIZE) {
+ bufferSize = DEFAULT_BUFFER_SIZE;
+ }
+ buffer = new byte[bufferSize];
+ }
+
+ /**
+ * Initialize buffer to default size.
+ */
+ public void initBuffer() {
+ initBuffer(0);
+ }
+
+ /**
+ * @return amount of buffer space currently allocated
+ */
+ public int bufferSize() {
+ if (buffer == null) {
+ return 0;
+ }
+ return buffer.length;
+ }
+
+ /**
+ * Set a field by actually copying in to a local buffer.
+ * If you must actually copy data in to the array, use this method.
+ * DO NOT USE this method unless it's not practical to set data by reference with setRef().
+ * Setting data by reference tends to run a lot faster than copying data in.
+ *
+ * @param elementNum index within column vector to set
+ * @param sourceBuf container of source data
+ * @param start start byte position within source
+ * @param length length of source byte sequence
+ */
+ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) {
+ if ((nextFree + length) > buffer.length) {
+ increaseBufferSpace(length);
+ }
+ System.arraycopy(sourceBuf, start, buffer, nextFree, length);
+ vector[elementNum] = buffer;
+ this.start[elementNum] = nextFree;
+ this.length[elementNum] = length;
+ nextFree += length;
+ }
+
+ /**
+ * Set a field to the concatenation of two string values. Result data is copied
+ * into the internal buffer.
+ *
+ * @param elementNum index within column vector to set
+ * @param leftSourceBuf container of left argument
+ * @param leftStart start of left argument
+ * @param leftLen length of left argument
+ * @param rightSourceBuf container of right argument
+ * @param rightStart start of right argument
+ * @param rightLen length of right arugment
+ */
+ public void setConcat(int elementNum, byte[] leftSourceBuf, int leftStart, int leftLen,
+ byte[] rightSourceBuf, int rightStart, int rightLen) {
+ int newLen = leftLen + rightLen;
+ if ((nextFree + newLen) > buffer.length) {
+ increaseBufferSpace(newLen);
+ }
+ vector[elementNum] = buffer;
+ this.start[elementNum] = nextFree;
+ this.length[elementNum] = newLen;
+
+ System.arraycopy(leftSourceBuf, leftStart, buffer, nextFree, leftLen);
+ nextFree += leftLen;
+ System.arraycopy(rightSourceBuf, rightStart, buffer, nextFree, rightLen);
+ nextFree += rightLen;
+ }
+
+ /**
+ * Increase buffer space enough to accommodate next element.
+ * This uses an exponential increase mechanism to rapidly
+ * increase buffer size to enough to hold all data.
+ * As batches get re-loaded, buffer space allocated will quickly
+ * stabilize.
+ *
+ * @param nextElemLength size of next element to be added
+ */
+ public void increaseBufferSpace(int nextElemLength) {
+
+ // Keep doubling buffer size until there will be enough space for next element.
+ int newLength = 2 * buffer.length;
+ while((nextFree + nextElemLength) > newLength) {
+ newLength *= 2;
+ }
+
+ // Allocate new buffer, copy data to it, and set buffer to new buffer.
+ byte[] newBuffer = new byte[newLength];
+ System.arraycopy(buffer, 0, newBuffer, 0, nextFree);
+ buffer = newBuffer;
+ }
+
+ /** Copy the current object contents into the output. Only copy selected entries,
+ * as indicated by selectedInUse and the sel array.
+ */
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, BytesColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.setVal(0, vector[0], start[0], length[0]);
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.setVal(i, vector[i], start[i], length[i]);
+ }
+ }
+ else {
+ for (int i = 0; i < size; i++) {
+ output.setVal(i, vector[i], start[i], length[i]);
+ }
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ /** Simplify vector by brute-force flattening noNulls and isRepeating
+ * This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ * with many arguments, at the expense of loss of some performance.
+ */
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+
+ // setRef is used below and this is safe, because the reference
+ // is to data owned by this column vector. If this column vector
+ // gets re-used, the whole thing is re-used together so there
+ // is no danger of a dangling reference.
+
+ // Only copy data values if entry is not null. The string value
+ // at position 0 is undefined if the position 0 value is null.
+ if (noNulls || !isNull[0]) {
+
+ // loops start at position 1 because position 0 is already set
+ if (selectedInUse) {
+ for (int j = 1; j < size; j++) {
+ int i = sel[j];
+ this.setRef(i, vector[0], start[0], length[0]);
+ }
+ } else {
+ for (int i = 1; i < size; i++) {
+ this.setRef(i, vector[0], start[0], length[0]);
+ }
+ }
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ // Fill the all the vector entries with provided value
+ public void fill(byte[] value) {
+ noNulls = true;
+ isRepeating = true;
+ setRef(0, value, 0, value.length);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ BytesColumnVector in = (BytesColumnVector) inputVector;
+ setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]);
+ }
+
+ @Override
+ public void init() {
+ initBuffer(0);
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append('"');
+ buffer.append(new String(this.buffer, start[row], length[row]));
+ buffer.append('"');
+ } else {
+ buffer.append("null");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
new file mode 100644
index 0000000..cb75c2c
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * ColumnVector contains the shared structure for the sub-types,
+ * including NULL information, and whether this vector
+ * repeats, i.e. has all values the same, so only the first
+ * one is set. This is used to accelerate query performance
+ * by handling a whole vector in O(1) time when applicable.
+ *
+ * The fields are public by design since this is a performance-critical
+ * structure that is used in the inner loop of query execution.
+ */
+public abstract class ColumnVector {
+
+ /*
+ * The current kinds of column vectors.
+ */
+ public static enum Type {
+ LONG,
+ DOUBLE,
+ BYTES,
+ DECIMAL
+ }
+
+ /*
+ * If hasNulls is true, then this array contains true if the value
+ * is null, otherwise false. The array is always allocated, so a batch can be re-used
+ * later and nulls added.
+ */
+ public boolean[] isNull;
+
+ // If the whole column vector has no nulls, this is true, otherwise false.
+ public boolean noNulls;
+
+ /*
+ * True if same value repeats for whole column vector.
+ * If so, vector[0] holds the repeating value.
+ */
+ public boolean isRepeating;
+
+ // Variables to hold state from before flattening so it can be easily restored.
+ private boolean preFlattenIsRepeating;
+ private boolean preFlattenNoNulls;
+
+ /**
+ * Constructor for super-class ColumnVector. This is not called directly,
+ * but used to initialize inherited fields.
+ *
+ * @param len Vector length
+ */
+ public ColumnVector(int len) {
+ isNull = new boolean[len];
+ noNulls = true;
+ isRepeating = false;
+ }
+
+ /**
+ * Resets the column to default state
+ * - fills the isNull array with false
+ * - sets noNulls to true
+ * - sets isRepeating to false
+ */
+ public void reset() {
+ if (false == noNulls) {
+ Arrays.fill(isNull, false);
+ }
+ noNulls = true;
+ isRepeating = false;
+ }
+
+ abstract public void flatten(boolean selectedInUse, int[] sel, int size);
+
+ // Simplify vector by brute-force flattening noNulls if isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) {
+
+ boolean nullFillValue;
+
+ if (noNulls) {
+ nullFillValue = false;
+ } else {
+ nullFillValue = isNull[0];
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ isNull[i] = nullFillValue;
+ }
+ } else {
+ Arrays.fill(isNull, 0, size, nullFillValue);
+ }
+
+ // all nulls are now explicit
+ noNulls = false;
+ }
+
+ public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) {
+ if (noNulls) {
+ noNulls = false;
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ isNull[i] = false;
+ }
+ } else {
+ Arrays.fill(isNull, 0, size, false);
+ }
+ }
+ }
+
+ /**
+ * Restore the state of isRepeating and noNulls to what it was
+ * before flattening. This must only be called just after flattening
+ * and then evaluating a VectorExpression on the column vector.
+ * It is an optimization that allows other operations on the same
+ * column to continue to benefit from the isRepeating and noNulls
+ * indicators.
+ */
+ public void unFlatten() {
+ isRepeating = preFlattenIsRepeating;
+ noNulls = preFlattenNoNulls;
+ }
+
+ // Record repeating and no nulls state to be restored later.
+ protected void flattenPush() {
+ preFlattenIsRepeating = isRepeating;
+ preFlattenNoNulls = noNulls;
+ }
+
+ /**
+ * Set the element in this column vector from the given input vector.
+ */
+ public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector);
+
+ /**
+ * Initialize the column vector. This method can be overridden by specific column vector types.
+ * Use this method only if the individual type of the column vector is not known, otherwise its
+ * preferable to call specific initialization methods.
+ */
+ public void init() {
+ // Do nothing by default
+ }
+
+ /**
+ * Print the value for this column into the given string builder.
+ * @param buffer the buffer to print into
+ * @param row the id of the row to print
+ */
+ public abstract void stringifyValue(StringBuilder buffer,
+ int row);
+ }
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
new file mode 100644
index 0000000..74a9d5f
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.math.BigInteger;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+public class DecimalColumnVector extends ColumnVector {
+
+ /**
+ * A vector of HiveDecimalWritable objects.
+ *
+ * For high performance and easy access to this low-level structure,
+ * the fields are public by design (as they are in other ColumnVector
+ * types).
+ */
+ public HiveDecimalWritable[] vector;
+ public short scale;
+ public short precision;
+
+ public DecimalColumnVector(int precision, int scale) {
+ this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
+ }
+
+ public DecimalColumnVector(int size, int precision, int scale) {
+ super(size);
+ this.precision = (short) precision;
+ this.scale = (short) scale;
+ vector = new HiveDecimalWritable[size];
+ for (int i = 0; i < size; i++) {
+ vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
+ }
+ }
+
+ @Override
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ // TODO Auto-generated method stub
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ HiveDecimal hiveDec = ((DecimalColumnVector) inputVector).vector[inputElementNum].getHiveDecimal(precision, scale);
+ if (hiveDec == null) {
+ noNulls = false;
+ isNull[outElementNum] = true;
+ } else {
+ vector[outElementNum].set(hiveDec);
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append(vector[row].toString());
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ public void set(int elementNum, HiveDecimalWritable writeable) {
+ HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale);
+ if (hiveDec == null) {
+ noNulls = false;
+ isNull[elementNum] = true;
+ } else {
+ vector[elementNum].set(hiveDec);
+ }
+ }
+
+ public void set(int elementNum, HiveDecimal hiveDec) {
+ HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale);
+ if (checkedDec == null) {
+ noNulls = false;
+ isNull[elementNum] = true;
+ } else {
+ vector[elementNum].set(checkedDec);
+ }
+ }
+
+ public void setNullDataValue(int elementNum) {
+ // E.g. For scale 2 the minimum is "0.01"
+ HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale);
+ vector[elementNum].set(minimumNonZeroValue);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
new file mode 100644
index 0000000..4a7811d
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable double precision floating point column vector.
+ * This class will be used for operations on all floating point types (float, double)
+ * and as such will use a 64-bit double value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class DoubleColumnVector extends ColumnVector {
+ public double[] vector;
+ public static final double NULL_VALUE = Double.NaN;
+
+ /**
+ * Use this constructor by default. All column vectors
+ * should normally be the default size.
+ */
+ public DoubleColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't use this except for testing purposes.
+ *
+ * @param len
+ */
+ public DoubleColumnVector(int len) {
+ super(len);
+ vector = new double[len];
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.vector[0] = vector[0];
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ else {
+ System.arraycopy(vector, 0, output.vector, 0, size);
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ // Fill the column vector with the provided value
+ public void fill(double value) {
+ noNulls = true;
+ isRepeating = true;
+ vector[0] = value;
+ }
+
+ // Simplify vector by brute-force flattening noNulls and isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+ double repeatVal = vector[0];
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ vector[i] = repeatVal;
+ }
+ } else {
+ Arrays.fill(vector, 0, size, repeatVal);
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum];
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append(vector[row]);
+ } else {
+ buffer.append("null");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
new file mode 100644
index 0000000..5702584
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable int column vector.
+ * This class will be used for operations on all integer types (tinyint, smallint, int, bigint)
+ * and as such will use a 64-bit long value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller int types will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class LongColumnVector extends ColumnVector {
+ public long[] vector;
+ public static final long NULL_VALUE = 1;
+
+ /**
+ * Use this constructor by default. All column vectors
+ * should normally be the default size.
+ */
+ public LongColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't use this except for testing purposes.
+ *
+ * @param len the number of rows
+ */
+ public LongColumnVector(int len) {
+ super(len);
+ vector = new long[len];
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.vector[0] = vector[0];
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ else {
+ System.arraycopy(vector, 0, output.vector, 0, size);
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.vector[0] = vector[0]; // automatic conversion to double is done here
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ else {
+ for(int i = 0; i < size; ++i) {
+ output.vector[i] = vector[i];
+ }
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ // Fill the column vector with the provided value
+ public void fill(long value) {
+ noNulls = true;
+ isRepeating = true;
+ vector[0] = value;
+ }
+
+ // Simplify vector by brute-force flattening noNulls and isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+ long repeatVal = vector[0];
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ vector[i] = repeatVal;
+ }
+ } else {
+ Arrays.fill(vector, 0, size, repeatVal);
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum];
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append(vector[row]);
+ } else {
+ buffer.append("null");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
new file mode 100644
index 0000000..7c18da6
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A VectorizedRowBatch is a set of rows, organized with each column
+ * as a vector. It is the unit of query execution, organized to minimize
+ * the cost per row and achieve high cycles-per-instruction.
+ * The major fields are public by design to allow fast and convenient
+ * access by the vectorized query execution code.
+ */
+public class VectorizedRowBatch implements Writable {
+ public int numCols; // number of columns
+ public ColumnVector[] cols; // a vector for each column
+ public int size; // number of rows that qualify (i.e. haven't been filtered out)
+ public int[] selected; // array of positions of selected values
+ public int[] projectedColumns;
+ public int projectionSize;
+
+ /*
+ * If no filtering has been applied yet, selectedInUse is false,
+ * meaning that all rows qualify. If it is true, then the selected[] array
+ * records the offsets of qualifying rows.
+ */
+ public boolean selectedInUse;
+
+ // If this is true, then there is no data in the batch -- we have hit the end of input.
+ public boolean endOfFile;
+
+ /*
+ * This number is carefully chosen to minimize overhead and typically allows
+ * one VectorizedRowBatch to fit in cache.
+ */
+ public static final int DEFAULT_SIZE = 1024;
+
+ /**
+ * Return a batch with the specified number of columns.
+ * This is the standard constructor -- all batches should be the same size
+ *
+ * @param numCols the number of columns to include in the batch
+ */
+ public VectorizedRowBatch(int numCols) {
+ this(numCols, DEFAULT_SIZE);
+ }
+
+ /**
+ * Return a batch with the specified number of columns and rows.
+ * Only call this constructor directly for testing purposes.
+ * Batch size should normally always be defaultSize.
+ *
+ * @param numCols the number of columns to include in the batch
+ * @param size the number of rows to include in the batch
+ */
+ public VectorizedRowBatch(int numCols, int size) {
+ this.numCols = numCols;
+ this.size = size;
+ selected = new int[size];
+ selectedInUse = false;
+ this.cols = new ColumnVector[numCols];
+ projectedColumns = new int[numCols];
+
+ // Initially all columns are projected and in the same order
+ projectionSize = numCols;
+ for (int i = 0; i < numCols; i++) {
+ projectedColumns[i] = i;
+ }
+ }
+
+ /**
+ * Returns the maximum size of the batch (number of rows it can hold)
+ */
+ public int getMaxSize() {
+ return selected.length;
+ }
+
+ /**
+ * Return count of qualifying rows.
+ *
+ * @return number of rows that have not been filtered out
+ */
+ public long count() {
+ return size;
+ }
+
+ private static String toUTF8(Object o) {
+ if(o == null || o instanceof NullWritable) {
+ return "\\N"; /* as found in LazySimpleSerDe's nullSequence */
+ }
+ return o.toString();
+ }
+
+ @Override
+ public String toString() {
+ if (size == 0) {
+ return "";
+ }
+ StringBuilder b = new StringBuilder();
+ if (this.selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = selected[j];
+ b.append('[');
+ for (int k = 0; k < projectionSize; k++) {
+ int projIndex = projectedColumns[k];
+ ColumnVector cv = cols[projIndex];
+ if (k > 0) {
+ b.append(", ");
+ }
+ cv.stringifyValue(b, i);
+ }
+ b.append(']');
+ if (j < size - 1) {
+ b.append('\n');
+ }
+ }
+ } else {
+ for (int i = 0; i < size; i++) {
+ b.append('[');
+ for (int k = 0; k < projectionSize; k++) {
+ int projIndex = projectedColumns[k];
+ ColumnVector cv = cols[projIndex];
+ if (k > 0) {
+ b.append(", ");
+ }
+ cv.stringifyValue(b, i);
+ }
+ b.append(']');
+ if (i < size - 1) {
+ b.append('\n');
+ }
+ }
+ }
+ return b.toString();
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ throw new UnsupportedOperationException("Do you really need me?");
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ throw new UnsupportedOperationException("Don't call me");
+ }
+
+ /**
+ * Resets the row batch to default state
+ * - sets selectedInUse to false
+ * - sets size to 0
+ * - sets endOfFile to false
+ * - resets each column
+ * - inits each column
+ */
+ public void reset() {
+ selectedInUse = false;
+ size = 0;
+ endOfFile = false;
+ for (ColumnVector vc : cols) {
+ if (vc != null) {
+ vc.reset();
+ vc.init();
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
new file mode 100644
index 0000000..577d95d
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * The inner representation of the SearchArgument. Most users should not
+ * need this interface, it is only for file formats that need to translate
+ * the SearchArgument into an internal form.
+ */
+public class ExpressionTree {
+ public enum Operator {OR, AND, NOT, LEAF, CONSTANT}
+ private final Operator operator;
+ private final List<ExpressionTree> children;
+ private final int leaf;
+ private final SearchArgument.TruthValue constant;
+
+ ExpressionTree() {
+ operator = null;
+ children = null;
+ leaf = 0;
+ constant = null;
+ }
+
+ ExpressionTree(Operator op, ExpressionTree... kids) {
+ operator = op;
+ children = new ArrayList<ExpressionTree>();
+ leaf = -1;
+ this.constant = null;
+ Collections.addAll(children, kids);
+ }
+
+ ExpressionTree(int leaf) {
+ operator = Operator.LEAF;
+ children = null;
+ this.leaf = leaf;
+ this.constant = null;
+ }
+
+ ExpressionTree(SearchArgument.TruthValue constant) {
+ operator = Operator.CONSTANT;
+ children = null;
+ this.leaf = -1;
+ this.constant = constant;
+ }
+
+ ExpressionTree(ExpressionTree other) {
+ this.operator = other.operator;
+ if (other.children == null) {
+ this.children = null;
+ } else {
+ this.children = new ArrayList<ExpressionTree>();
+ for(ExpressionTree child: other.children) {
+ children.add(new ExpressionTree(child));
+ }
+ }
+ this.leaf = other.leaf;
+ this.constant = other.constant;
+ }
+
+ public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves
+ ) {
+ SearchArgument.TruthValue result = null;
+ switch (operator) {
+ case OR:
+ for(ExpressionTree child: children) {
+ result = child.evaluate(leaves).or(result);
+ }
+ return result;
+ case AND:
+ for(ExpressionTree child: children) {
+ result = child.evaluate(leaves).and(result);
+ }
+ return result;
+ case NOT:
+ return children.get(0).evaluate(leaves).not();
+ case LEAF:
+ return leaves[leaf];
+ case CONSTANT:
+ return constant;
+ default:
+ throw new IllegalStateException("Unknown operator: " + operator);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ switch (operator) {
+ case OR:
+ buffer.append("(or");
+ for(ExpressionTree child: children) {
+ buffer.append(' ');
+ buffer.append(child.toString());
+ }
+ buffer.append(')');
+ break;
+ case AND:
+ buffer.append("(and");
+ for(ExpressionTree child: children) {
+ buffer.append(' ');
+ buffer.append(child.toString());
+ }
+ buffer.append(')');
+ break;
+ case NOT:
+ buffer.append("(not ");
+ buffer.append(children.get(0));
+ buffer.append(')');
+ break;
+ case LEAF:
+ buffer.append("leaf-");
+ buffer.append(leaf);
+ break;
+ case CONSTANT:
+ buffer.append(constant);
+ break;
+ }
+ return buffer.toString();
+ }
+
+ public Operator getOperator() {
+ return operator;
+ }
+
+ public List<ExpressionTree> getChildren() {
+ return children;
+ }
+
+ public SearchArgument.TruthValue getConstant() {
+ return constant;
+ }
+
+ public int getLeaf() {
+ return leaf;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
new file mode 100644
index 0000000..3a92565
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.List;
+
+/**
+ * The primitive predicates that form a SearchArgument.
+ */
+public interface PredicateLeaf {
+
+ /**
+ * The possible operators for predicates. To get the opposites, construct
+ * an expression with a not operator.
+ */
+ public static enum Operator {
+ EQUALS,
+ NULL_SAFE_EQUALS,
+ LESS_THAN,
+ LESS_THAN_EQUALS,
+ IN,
+ BETWEEN,
+ IS_NULL
+ }
+
+ /**
+ * The possible types for sargs.
+ */
+ public static enum Type {
+ INTEGER(Integer.class), // all of the integer types except long
+ LONG(Long.class),
+ FLOAT(Double.class), // float and double
+ STRING(String.class), // string, char, varchar
+ DATE(Date.class),
+ DECIMAL(HiveDecimalWritable.class),
+ TIMESTAMP(Timestamp.class),
+ BOOLEAN(Boolean.class);
+
+ private final Class cls;
+ Type(Class cls) {
+ this.cls = cls;
+ }
+
+ /**
+ * For all SARG leaves, the values must be the matching class.
+ * @return the value class
+ */
+ public Class getValueClass() {
+ return cls;
+ }
+ }
+
+ /**
+ * Get the operator for the leaf.
+ */
+ public Operator getOperator();
+
+ /**
+ * Get the type of the column and literal by the file format.
+ */
+ public Type getType();
+
+ /**
+ * Get the simple column name.
+ * @return the column name
+ */
+ public String getColumnName();
+
+ /**
+ * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
+ *
+ * @return an Integer, Long, Double, or String
+ */
+ public Object getLiteral();
+
+ /**
+ * For operators with multiple literals (IN and BETWEEN), get the literals.
+ *
+ * @return the list of literals (Integer, Longs, Doubles, or Strings)
+ *
+ */
+ public List<Object> getLiteralList();
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
new file mode 100644
index 0000000..d70b3b0
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.util.List;
+
+/**
+ * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable">
+ * SearchArgument</a>, which are the subset of predicates
+ * that can be pushed down to the RecordReader. Each SearchArgument consists
+ * of a series of SearchClauses that must each be true for the row to be
+ * accepted by the filter.
+ *
+ * This requires that the filter be normalized into conjunctive normal form
+ * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>).
+ */
+public interface SearchArgument {
+
+ /**
+ * The potential result sets of logical operations.
+ */
+ public static enum TruthValue {
+ YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL;
+
+ /**
+ * Compute logical or between the two values.
+ * @param right the other argument or null
+ * @return the result
+ */
+ public TruthValue or(TruthValue right) {
+ if (right == null || right == this) {
+ return this;
+ }
+ if (right == YES || this == YES) {
+ return YES;
+ }
+ if (right == YES_NULL || this == YES_NULL) {
+ return YES_NULL;
+ }
+ if (right == NO) {
+ return this;
+ }
+ if (this == NO) {
+ return right;
+ }
+ if (this == NULL) {
+ if (right == NO_NULL) {
+ return NULL;
+ } else {
+ return YES_NULL;
+ }
+ }
+ if (right == NULL) {
+ if (this == NO_NULL) {
+ return NULL;
+ } else {
+ return YES_NULL;
+ }
+ }
+ return YES_NO_NULL;
+ }
+
+ /**
+ * Compute logical AND between the two values.
+ * @param right the other argument or null
+ * @return the result
+ */
+ public TruthValue and(TruthValue right) {
+ if (right == null || right == this) {
+ return this;
+ }
+ if (right == NO || this == NO) {
+ return NO;
+ }
+ if (right == NO_NULL || this == NO_NULL) {
+ return NO_NULL;
+ }
+ if (right == YES) {
+ return this;
+ }
+ if (this == YES) {
+ return right;
+ }
+ if (this == NULL) {
+ if (right == YES_NULL) {
+ return NULL;
+ } else {
+ return NO_NULL;
+ }
+ }
+ if (right == NULL) {
+ if (this == YES_NULL) {
+ return NULL;
+ } else {
+ return NO_NULL;
+ }
+ }
+ return YES_NO_NULL;
+ }
+
+ public TruthValue not() {
+ switch (this) {
+ case NO:
+ return YES;
+ case YES:
+ return NO;
+ case NULL:
+ case YES_NO:
+ case YES_NO_NULL:
+ return this;
+ case NO_NULL:
+ return YES_NULL;
+ case YES_NULL:
+ return NO_NULL;
+ default:
+ throw new IllegalArgumentException("Unknown value: " + this);
+ }
+ }
+
+ /**
+ * Does the RecordReader need to include this set of records?
+ * @return true unless none of the rows qualify
+ */
+ public boolean isNeeded() {
+ switch (this) {
+ case NO:
+ case NULL:
+ case NO_NULL:
+ return false;
+ default:
+ return true;
+ }
+ }
+ }
+
+ /**
+ * Get the leaf predicates that are required to evaluate the predicate. The
+ * list will have the duplicates removed.
+ * @return the list of leaf predicates
+ */
+ public List<PredicateLeaf> getLeaves();
+
+ /**
+ * Get the expression tree. This should only needed for file formats that
+ * need to translate the expression to an internal form.
+ */
+ public ExpressionTree getExpression();
+
+ /**
+ * Evaluate the entire predicate based on the values for the leaf predicates.
+ * @param leaves the value of each leaf predicate
+ * @return the value of hte entire predicate
+ */
+ public TruthValue evaluate(TruthValue[] leaves);
+
+ /**
+ * A builder object for contexts outside of Hive where it isn't easy to
+ * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
+ * before adding any leaves.
+ */
+ public interface Builder {
+
+ /**
+ * Start building an or operation and push it on the stack.
+ * @return this
+ */
+ public Builder startOr();
+
+ /**
+ * Start building an and operation and push it on the stack.
+ * @return this
+ */
+ public Builder startAnd();
+
+ /**
+ * Start building a not operation and push it on the stack.
+ * @return this
+ */
+ public Builder startNot();
+
+ /**
+ * Finish the current operation and pop it off of the stack. Each start
+ * call must have a matching end.
+ * @return this
+ */
+ public Builder end();
+
+ /**
+ * Add a less than leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @param literal the literal
+ * @return this
+ */
+ public Builder lessThan(String column, PredicateLeaf.Type type,
+ Object literal);
+
+ /**
+ * Add a less than equals leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @param literal the literal
+ * @return this
+ */
+ public Builder lessThanEquals(String column, PredicateLeaf.Type type,
+ Object literal);
+
+ /**
+ * Add an equals leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @param literal the literal
+ * @return this
+ */
+ public Builder equals(String column, PredicateLeaf.Type type,
+ Object literal);
+
+ /**
+ * Add a null safe equals leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @param literal the literal
+ * @return this
+ */
+ public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
+ Object literal);
+
+ /**
+ * Add an in leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @param literal the literal
+ * @return this
+ */
+ public Builder in(String column, PredicateLeaf.Type type,
+ Object... literal);
+
+ /**
+ * Add an is null leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @return this
+ */
+ public Builder isNull(String column, PredicateLeaf.Type type);
+
+ /**
+ * Add a between leaf to the current item on the stack.
+ * @param column the name of the column
+ * @param type the type of the expression
+ * @param lower the literal
+ * @param upper the literal
+ * @return this
+ */
+ public Builder between(String column, PredicateLeaf.Type type,
+ Object lower, Object upper);
+
+ /**
+ * Add a truth value to the expression.
+ * @param truth
+ * @return this
+ */
+ public Builder literal(TruthValue truth);
+
+ /**
+ * Build and return the SearchArgument that has been defined. All of the
+ * starts must have been ended before this call.
+ * @return the new SearchArgument
+ */
+ public SearchArgument build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
new file mode 100644
index 0000000..0778935
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+/**
+ * A factory for creating SearchArguments.
+ */
+public class SearchArgumentFactory {
+ public static SearchArgument.Builder newBuilder() {
+ return new SearchArgumentImpl.BuilderImpl();
+ }
+}