You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/05 21:15:48 UTC
svn commit: r1622763 [13/17] - in /hive/branches/tez: ./
ant/src/org/apache/hadoop/hive/ant/
hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/
hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/
itests/hive-unit/ itests/hive-u...
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Fri Sep 5 19:15:44 2014
@@ -57,6 +57,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.model.MRoleMap;
import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege;
import org.apache.hadoop.hive.metastore.model.MTablePrivilege;
+import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.thrift.TException;
public interface RawStore extends Configurable {
@@ -130,6 +131,9 @@ public interface RawStore extends Config
public abstract boolean addPartitions(String dbName, String tblName, List<Partition> parts)
throws InvalidObjectException, MetaException;
+ public abstract boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, boolean ifNotExists)
+ throws InvalidObjectException, MetaException;
+
public abstract Partition getPartition(String dbName, String tableName,
List<String> part_vals) throws MetaException, NoSuchObjectException;
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Fri Sep 5 19:15:44 2014
@@ -507,8 +507,18 @@ public class Warehouse {
*/
public FileStatus[] getFileStatusesForSD(StorageDescriptor desc)
throws MetaException {
+ return getFileStatusesForLocation(desc.getLocation());
+ }
+
+ /**
+ * @param location
+ * @return array of FileStatus objects corresponding to the files
+ * making up the passed storage description
+ */
+ public FileStatus[] getFileStatusesForLocation(String location)
+ throws MetaException {
try {
- Path path = new Path(desc.getLocation());
+ Path path = new Path(location);
FileSystem fileSys = path.getFileSystem(conf);
return HiveStatsUtils.getFileStatusRecurse(path, -1, fileSys);
} catch (IOException ioe) {
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java Fri Sep 5 19:15:44 2014
@@ -21,19 +21,23 @@ package org.apache.hadoop.hive.metastore
import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.List;
public class AddPartitionEvent extends ListenerEvent {
private final Table table;
private final List<Partition> partitions;
+ private PartitionSpecProxy partitionSpecProxy;
public AddPartitionEvent(Table table, List<Partition> partitions, boolean status, HMSHandler handler) {
super(status, handler);
this.table = table;
this.partitions = partitions;
+ this.partitionSpecProxy = null;
}
public AddPartitionEvent(Table table, Partition partition, boolean status, HMSHandler handler) {
@@ -41,6 +45,16 @@ public class AddPartitionEvent extends L
}
/**
+ * Alternative constructor to use PartitionSpec APIs.
+ */
+ public AddPartitionEvent(Table table, PartitionSpecProxy partitionSpec, boolean status, HMSHandler handler) {
+ super(status, handler);
+ this.table = table;
+ this.partitions = null;
+ this.partitionSpecProxy = partitionSpec;
+ }
+
+ /**
* @return The table.
*/
public Table getTable() {
@@ -54,4 +68,11 @@ public class AddPartitionEvent extends L
return partitions;
}
+ /**
+ * @return Iterator for partitions.
+ */
+ public Iterator<Partition> getPartitionIterator() {
+ return partitionSpecProxy == null ? null : partitionSpecProxy.getPartitionIterator();
+ }
+
}
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java Fri Sep 5 19:15:44 2014
@@ -21,19 +21,23 @@ package org.apache.hadoop.hive.metastore
import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.List;
public class PreAddPartitionEvent extends PreEventContext {
private final Table table;
private final List<Partition> partitions;
+ private PartitionSpecProxy partitionSpecProxy;
public PreAddPartitionEvent (Table table, List<Partition> partitions, HMSHandler handler) {
super(PreEventType.ADD_PARTITION, handler);
this.table = table;
this.partitions = partitions;
+ this.partitionSpecProxy = null;
}
public PreAddPartitionEvent(Table table, Partition partition, HMSHandler handler) {
@@ -41,6 +45,14 @@ public class PreAddPartitionEvent extend
}
/**
+ * Alternative constructor, using
+ */
+ public PreAddPartitionEvent(Table table, PartitionSpecProxy partitionSpecProxy, HMSHandler handler) {
+ this(table, (List<Partition>)null, handler);
+ this.partitionSpecProxy = partitionSpecProxy;
+ }
+
+ /**
* @return the partitions
*/
public List<Partition> getPartitions() {
@@ -53,4 +65,11 @@ public class PreAddPartitionEvent extend
public Table getTable() {
return table ;
}
+
+ /**
+ * @return Iterator over partition-list.
+ */
+ public Iterator<Partition> getPartitionIterator() {
+ return partitionSpecProxy == null ? null : partitionSpecProxy.getPartitionIterator();
+ }
}
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java Fri Sep 5 19:15:44 2014
@@ -34,9 +34,17 @@ public class CompactionInfo {
private String fullPartitionName = null;
private String fullTableName = null;
+ public CompactionInfo(String dbname, String tableName, String partName, CompactionType type) {
+ this.dbname = dbname;
+ this.tableName = tableName;
+ this.partName = partName;
+ this.type = type;
+ }
+ CompactionInfo() {}
+
public String getFullPartitionName() {
if (fullPartitionName == null) {
- StringBuffer buf = new StringBuffer(dbname);
+ StringBuilder buf = new StringBuilder(dbname);
buf.append('.');
buf.append(tableName);
if (partName != null) {
@@ -50,11 +58,14 @@ public class CompactionInfo {
public String getFullTableName() {
if (fullTableName == null) {
- StringBuffer buf = new StringBuffer(dbname);
+ StringBuilder buf = new StringBuilder(dbname);
buf.append('.');
buf.append(tableName);
fullTableName = buf.toString();
}
return fullTableName;
}
+ public boolean isMajorCompaction() {
+ return CompactionType.MAJOR == type;
+ }
}
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java Fri Sep 5 19:15:44 2014
@@ -535,6 +535,46 @@ public class CompactionTxnHandler extend
deadlockCnt = 0;
}
}
+
+ /**
+ * Queries metastore DB directly to find columns in the table which have statistics information.
+ * If {@code ci} includes partition info then per partition stats info is examined, otherwise
+ * table level stats are examined.
+ * @throws MetaException
+ */
+ public List<String> findColumnsWithStats(CompactionInfo ci) throws MetaException {
+ Connection dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
+ Statement stmt = null;
+ ResultSet rs = null;
+ try {
+ stmt = dbConn.createStatement();
+ String s = "SELECT COLUMN_NAME FROM " + (ci.partName == null ? "TAB_COL_STATS" : "PART_COL_STATS")
+ + " WHERE DB_NAME='" + ci.dbname + "' AND TABLE_NAME='" + ci.tableName + "'"
+ + (ci.partName == null ? "" : " AND PARTITION_NAME='" + ci.partName + "'");
+ LOG.debug("Going to execute <" + s + ">");
+ rs = stmt.executeQuery(s);
+ List<String> columns = new ArrayList<String>();
+ while(rs.next()) {
+ columns.add(rs.getString(1));
+ }
+ LOG.debug("Found columns to update stats: " + columns + " on " + ci.tableName +
+ (ci.partName == null ? "" : "/" + ci.partName));
+ dbConn.commit();
+ return columns;
+ } catch (SQLException e) {
+ try {
+ LOG.error("Failed to find columns to analyze stats on for " + ci.tableName +
+ (ci.partName == null ? "" : "/" + ci.partName), e);
+ dbConn.rollback();
+ } catch (SQLException e1) {
+ //nothing we can do here
+ }
+ throw new MetaException("Unable to connect to transaction database " +
+ StringUtils.stringifyException(e));
+ } finally {
+ close(rs, stmt, dbConn);
+ }
+ }
}
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Fri Sep 5 19:15:44 2014
@@ -860,6 +860,29 @@ public class TxnHandler {
}
/**
+ * Close the ResultSet.
+ * @param rs may be {@code null}
+ */
+ void close(ResultSet rs) {
+ try {
+ if (rs != null && !rs.isClosed()) {
+ rs.close();
+ }
+ }
+ catch(SQLException ex) {
+ LOG.warn("Failed to close statement " + ex.getMessage());
+ }
+ }
+
+ /**
+ * Close all 3 JDBC artifacts in order: {@code rs stmt dbConn}
+ */
+ void close(ResultSet rs, Statement stmt, Connection dbConn) {
+ close(rs);
+ closeStmt(stmt);
+ closeDbConn(dbConn);
+ }
+ /**
* Determine if an exception was a deadlock. Unfortunately there is no standard way to do
* this, so we have to inspect the error messages and catch the telltale signs for each
* different database.
Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original)
+++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Fri Sep 5 19:15:44 2014
@@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.model.MRoleMap;
import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege;
import org.apache.hadoop.hive.metastore.model.MTablePrivilege;
+import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.thrift.TException;
/**
@@ -677,6 +678,11 @@ public class DummyRawStoreControlledComm
}
@Override
+ public boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, boolean ifNotExists) throws InvalidObjectException, MetaException {
+ return false;
+ }
+
+ @Override
public void dropPartitions(String dbName, String tblName, List<String> partNames)
throws MetaException, NoSuchObjectException {
objectStore.dropPartitions(dbName, tblName, partNames);
Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Fri Sep 5 19:15:44 2014
@@ -57,6 +57,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.model.MRoleMap;
import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege;
import org.apache.hadoop.hive.metastore.model.MTablePrivilege;
+import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.thrift.TException;
/**
@@ -699,6 +700,11 @@ public class DummyRawStoreForJdoConnecti
}
@Override
+ public boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, boolean ifNotExists) throws InvalidObjectException, MetaException {
+ return false;
+ }
+
+ @Override
public void dropPartitions(String dbName, String tblName, List<String> partNames) {
}
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt Fri Sep 5 19:15:44 2014
@@ -163,8 +163,8 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.PROJECTION)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt Fri Sep 5 19:15:44 2014
@@ -155,8 +155,8 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.PROJECTION)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt Fri Sep 5 19:15:44 2014
@@ -128,7 +128,7 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.PROJECTION)
.setNumArguments(1)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
}
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt Fri Sep 5 19:15:44 2014
@@ -188,8 +188,8 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.FILTER)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt Fri Sep 5 19:15:44 2014
@@ -164,8 +164,8 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.FILTER)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt Fri Sep 5 19:15:44 2014
@@ -164,8 +164,8 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.FILTER)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.SCALAR,
VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt Fri Sep 5 19:15:44 2014
@@ -184,9 +184,9 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.FILTER)
.setNumArguments(3)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("string"),
- VectorExpressionDescriptor.ArgumentType.getType("string"),
- VectorExpressionDescriptor.ArgumentType.getType("string"))
+ VectorExpressionDescriptor.ArgumentType.STRING,
+ VectorExpressionDescriptor.ArgumentType.STRING,
+ VectorExpressionDescriptor.ArgumentType.STRING)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR,
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt Fri Sep 5 19:15:44 2014
@@ -176,8 +176,8 @@ public class <ClassName> extends VectorE
.setNumArguments(3)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.getType("long"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.COLUMN,
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt Fri Sep 5 19:15:44 2014
@@ -167,8 +167,8 @@ public class <ClassName> extends VectorE
.setNumArguments(3)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.getType("long"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType3>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType3>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.COLUMN,
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt Fri Sep 5 19:15:44 2014
@@ -169,8 +169,8 @@ public class <ClassName> extends VectorE
.setNumArguments(3)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.getType("long"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType3>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType3>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR,
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt Fri Sep 5 19:15:44 2014
@@ -154,8 +154,8 @@ public class <ClassName> extends VectorE
.setNumArguments(3)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.getType("long"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType3>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType3>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR,
Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt (original)
+++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt Fri Sep 5 19:15:44 2014
@@ -155,8 +155,8 @@ public class <ClassName> extends VectorE
VectorExpressionDescriptor.Mode.PROJECTION)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"),
- VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"))
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"))
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.SCALAR,
VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Fri Sep 5 19:15:44 2014
@@ -135,7 +135,6 @@ public class Driver implements CommandPr
private String errorMessage;
private String SQLState;
private Throwable downstreamError;
- private HiveTxnManager txnMgr;
// A limit on the number of threads that can be launched
private int maxthreads;
@@ -145,16 +144,6 @@ public class Driver implements CommandPr
private String userName;
- private void createTxnManager() throws SemanticException {
- if (txnMgr == null) {
- try {
- txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
- } catch (LockException e) {
- throw new SemanticException(e.getMessage(), e);
- }
- }
- }
-
private boolean checkConcurrency() throws SemanticException {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
if (!supportConcurrency) {
@@ -868,7 +857,7 @@ public class Driver implements CommandPr
// the input format.
private int recordValidTxns() {
try {
- ValidTxnList txns = txnMgr.getValidTxns();
+ ValidTxnList txns = SessionState.get().getTxnMgr().getValidTxns();
conf.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
return 0;
} catch (LockException e) {
@@ -893,7 +882,7 @@ public class Driver implements CommandPr
try {
- txnMgr.acquireLocks(plan, ctx, userName);
+ SessionState.get().getTxnMgr().acquireLocks(plan, ctx, userName);
return 0;
} catch (LockException e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
@@ -917,7 +906,7 @@ public class Driver implements CommandPr
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
if (hiveLocks != null) {
- ctx.getHiveTxnManager().getLockManager().releaseLocks(hiveLocks);
+ SessionState.get().getTxnMgr().getLockManager().releaseLocks(hiveLocks);
}
ctx.setHiveLocks(null);
@@ -1048,9 +1037,14 @@ public class Driver implements CommandPr
boolean requireLock = false;
boolean ckLock = false;
+ SessionState ss = SessionState.get();
try {
ckLock = checkConcurrency();
- createTxnManager();
+ try {
+ ss.initTxnMgr(conf);
+ } catch (LockException e) {
+ throw new SemanticException(e.getMessage(), e);
+ }
} catch (SemanticException e) {
errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
@@ -1074,7 +1068,7 @@ public class Driver implements CommandPr
// the reason that we set the txn manager for the cxt here is because each
// query has its own ctx object. The txn mgr is shared across the
// same instance of Driver, which can run multiple queries.
- ctx.setHiveTxnManager(txnMgr);
+ ctx.setHiveTxnManager(ss.getTxnMgr());
if (ckLock) {
boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY);
@@ -1670,9 +1664,6 @@ public class Driver implements CommandPr
e.getMessage());
}
}
- if (txnMgr != null) {
- txnMgr.closeTxnManager();
- }
}
public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java Fri Sep 5 19:15:44 2014
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hive.common.util.AnnotationUtils;
@@ -28,15 +30,46 @@ import org.apache.hive.common.util.Annot
*/
public class VectorExpressionDescriptor {
+ private static final Log LOG = LogFactory.getLog(
+ VectorExpressionDescriptor.class.getName());
+
final static int MAX_NUM_ARGUMENTS = 3;
+ //
+ // Special handling is needed at times for DATE, TIMESTAMP, (STRING), CHAR, and VARCHAR so they can
+ // be named specifically as argument types.
+ //
+ // LongColumnVector -->
+ // INT_FAMILY
+ // DATE
+ // TIMESTAMP
+ //
+ // DoubleColumnVector -->
+ // FLOAT_FAMILY
+ //
+ // DecimalColumnVector -->
+ // DECIMAL
+ //
+ // BytesColumnVector -->
+ // STRING
+ // CHAR
+ // VARCHAR
+ //
public enum ArgumentType {
- NONE(0),
- LONG(1),
- DOUBLE(2),
- STRING(3),
- DECIMAL(4),
- ANY(7);
+ NONE (0x000),
+ INT_FAMILY (0x001),
+ FLOAT_FAMILY (0x002),
+ DECIMAL (0x004),
+ STRING (0x008),
+ CHAR (0x010),
+ VARCHAR (0x020),
+ STRING_FAMILY (STRING.value | CHAR.value | VARCHAR.value),
+ DATE (0x040),
+ TIMESTAMP (0x080),
+ DATETIME_FAMILY (DATE.value | TIMESTAMP.value),
+ INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value),
+ STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value),
+ ALL_FAMILY (0xFFF);
private final int value;
@@ -48,12 +81,79 @@ public class VectorExpressionDescriptor
return value;
}
+ public static ArgumentType fromHiveTypeName(String hiveTypeName) {
+ String lower = hiveTypeName.toLowerCase();
+ if (lower.equals("tinyint") ||
+ lower.equals("smallint") ||
+ lower.equals("int") ||
+ lower.equals("bigint") ||
+ lower.equals("boolean") ||
+ lower.equals("long")) {
+ return INT_FAMILY;
+ } else if (lower.equals("double") || lower.equals("float")) {
+ return FLOAT_FAMILY;
+ } else if (lower.equals("string")) {
+ return STRING;
+ } else if (VectorizationContext.charTypePattern.matcher(lower).matches()) {
+ return CHAR;
+ } else if (VectorizationContext.varcharTypePattern.matcher(lower).matches()) {
+ return VARCHAR;
+ } else if (VectorizationContext.decimalTypePattern.matcher(lower).matches()) {
+ return DECIMAL;
+ } else if (lower.equals("timestamp")) {
+ return TIMESTAMP;
+ } else if (lower.equals("date")) {
+ return DATE;
+ } else if (lower.equals("void")) {
+ // The old code let void through...
+ return INT_FAMILY;
+ } else {
+ return NONE;
+ }
+ }
+
public static ArgumentType getType(String inType) {
- String type = VectorizationContext.getNormalizedTypeName(inType);
- if (VectorizationContext.decimalTypePattern.matcher(type).matches()) {
- type = "decimal";
+ if (inType.equalsIgnoreCase("long")) {
+ // A synonym in some places in the code...
+ return INT_FAMILY;
+ } else if (inType.equalsIgnoreCase("double")) {
+ // A synonym in some places in the code...
+ return FLOAT_FAMILY;
+ } else if (VectorizationContext.decimalTypePattern.matcher(inType).matches()) {
+ return DECIMAL;
+ } else if (VectorizationContext.charTypePattern.matcher(inType).matches()) {
+ return CHAR;
+ } else if (VectorizationContext.varcharTypePattern.matcher(inType).matches()) {
+ return VARCHAR;
+ }
+ return valueOf(inType.toUpperCase());
+ }
+
+ public boolean isSameTypeOrFamily(ArgumentType other) {
+ return ((value & other.value) != 0);
+ }
+
+ public static String getVectorColumnSimpleName(ArgumentType argType) {
+ if (argType == INT_FAMILY ||
+ argType == DATE ||
+ argType == TIMESTAMP) {
+ return "Long";
+ } else if (argType == FLOAT_FAMILY) {
+ return "Double";
+ } else if (argType == DECIMAL) {
+ return "Decimal";
+ } else if (argType == STRING ||
+ argType == CHAR ||
+ argType == VARCHAR) {
+ return "String";
+ } else {
+ return "None";
}
- return valueOf(type.toUpperCase());
+ }
+
+ public static String getVectorColumnSimpleName(String hiveTypeName) {
+ ArgumentType argType = fromHiveTypeName(hiveTypeName);
+ return getVectorColumnSimpleName(argType);
}
}
@@ -162,15 +262,12 @@ public class VectorExpressionDescriptor
*/
public static final class Descriptor {
- @Override
- public boolean equals(Object o) {
- Descriptor other = (Descriptor) o;
+ public boolean matches(Descriptor other) {
if (!mode.equals(other.mode) || (argCount != other.argCount) ) {
return false;
}
for (int i = 0; i < argCount; i++) {
- if (!argTypes[i].equals(other.argTypes[i]) && (!argTypes[i].equals(ArgumentType.ANY) &&
- !other.argTypes[i].equals(ArgumentType.ANY))) {
+ if (!argTypes[i].isSameTypeOrFamily(other.argTypes[i])) {
return false;
}
if (!exprTypes[i].equals(other.exprTypes[i])) {
@@ -228,13 +325,23 @@ public class VectorExpressionDescriptor
Class<? extends VectorExpression>[] list = annotation.value();
for (Class<? extends VectorExpression> ve : list) {
try {
- if (ve.newInstance().getDescriptor().equals(descriptor)) {
+ if (ve.newInstance().getDescriptor().matches(descriptor)) {
return ve;
}
} catch (Exception ex) {
throw new HiveException(ex);
}
}
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString());
+ for (Class<? extends VectorExpression> ve : list) {
+ try {
+ LOG.debug("getVectorExpressionClass doesn't match " + ve.getSimpleName() + " " + ve.newInstance().getDescriptor().toString());
+ } catch (Exception ex) {
+ throw new HiveException(ex);
+ }
+ }
+ }
return null;
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri Sep 5 19:15:44 2014
@@ -33,7 +33,9 @@ import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
@@ -97,11 +99,13 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.util.StringUtils;
/**
* Context class for vectorization execution.
@@ -123,6 +127,15 @@ public class VectorizationContext {
public static final Pattern decimalTypePattern = Pattern.compile("decimal.*",
Pattern.CASE_INSENSITIVE);
+ public static final Pattern charTypePattern = Pattern.compile("char.*",
+ Pattern.CASE_INSENSITIVE);
+
+ public static final Pattern varcharTypePattern = Pattern.compile("varchar.*",
+ Pattern.CASE_INSENSITIVE);
+
+ public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*",
+ Pattern.CASE_INSENSITIVE);
+
//Map column number to type
private final OutputColumnManager ocm;
@@ -210,14 +223,17 @@ public class VectorizationContext {
private final Set<Integer> usedOutputColumns = new HashSet<Integer>();
- int allocateOutputColumn(String columnType) {
- if (initialOutputCol < 0) {
- // This is a test
- return 0;
+ int allocateOutputColumn(String hiveTypeName) {
+ if (initialOutputCol < 0) {
+ // This is a test
+ return 0;
+ }
+
+ // We need to differentiate DECIMAL columns by their precision and scale...
+ String normalizedTypeName = getNormalizedName(hiveTypeName);
+ int relativeCol = allocateOutputColumnInternal(normalizedTypeName);
+ return initialOutputCol + relativeCol;
}
- int relativeCol = allocateOutputColumnInternal(columnType);
- return initialOutputCol + relativeCol;
- }
private int allocateOutputColumnInternal(String columnType) {
for (int i = 0; i < outputColCount; i++) {
@@ -548,6 +564,12 @@ public class VectorizationContext {
case STRING:
udfClass = new UDFToString();
break;
+ case CHAR:
+ genericUdf = new GenericUDFToChar();
+ break;
+ case VARCHAR:
+ genericUdf = new GenericUDFToVarchar();
+ break;
case BOOLEAN:
udfClass = new UDFToBoolean();
break;
@@ -592,15 +614,15 @@ public class VectorizationContext {
Class<? extends UDF> udfClass = bridge.getUdfClass();
if (udfClass.equals(UDFHex.class)
|| udfClass.equals(UDFConv.class)
- || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string")
- || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string")
+ || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr))
+ || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))
|| udfClass.equals(UDFToString.class) &&
(arg0Type(expr).equals("timestamp")
|| arg0Type(expr).equals("double")
|| arg0Type(expr).equals("float"))) {
return true;
}
- } else if ((gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string"))
+ } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr)))
/* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because
* of their complexity and generality. In the future, variations of these
@@ -615,6 +637,16 @@ public class VectorizationContext {
|| gudf instanceof GenericUDFCase
|| gudf instanceof GenericUDFWhen) {
return true;
+ } else if (gudf instanceof GenericUDFToChar &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
+ return true;
+ } else if (gudf instanceof GenericUDFToVarchar &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
+ return true;
}
return false;
}
@@ -721,27 +753,21 @@ public class VectorizationContext {
private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo,
Mode mode) throws HiveException {
- String type = typeInfo.getTypeName();
- String colVectorType = getNormalizedTypeName(type);
+ String typeName = typeInfo.getTypeName();
+ VectorExpressionDescriptor.ArgumentType vectorArgType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName);
+ if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) {
+ throw new HiveException("No vector argument type for type name " + typeName);
+ }
int outCol = -1;
if (mode == Mode.PROJECTION) {
- outCol = ocm.allocateOutputColumn(colVectorType);
+ outCol = ocm.allocateOutputColumn(typeName);
}
if (constantValue == null) {
- return new ConstantVectorExpression(outCol, type, true);
- } else if (decimalTypePattern.matcher(type).matches()) {
- VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue);
- ve.setOutputType(typeInfo.getTypeName());
- return ve;
- } else if (type.equalsIgnoreCase("long") || type.equalsIgnoreCase("int") ||
- type.equalsIgnoreCase("short") || type.equalsIgnoreCase("byte")) {
- return new ConstantVectorExpression(outCol,
- ((Number) constantValue).longValue());
- } else if (type.equalsIgnoreCase("double") || type.equalsIgnoreCase("float")) {
- return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue());
- } else if (type.equalsIgnoreCase("string")) {
- return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes());
- } else if (type.equalsIgnoreCase("boolean")) {
+ return new ConstantVectorExpression(outCol, typeName, true);
+ }
+
+ // Boolean is special case.
+ if (typeName.equalsIgnoreCase("boolean")) {
if (mode == Mode.FILTER) {
if (((Boolean) constantValue).booleanValue()) {
return new FilterConstantBooleanVectorExpression(1);
@@ -756,7 +782,26 @@ public class VectorizationContext {
}
}
}
- throw new HiveException("Unsupported constant type: "+type.toString());
+
+ switch (vectorArgType) {
+ case INT_FAMILY:
+ return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue());
+ case FLOAT_FAMILY:
+ return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue());
+ case DECIMAL:
+ VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue);
+ // Set type name with decimal precision, scale, etc.
+ ve.setOutputType(typeName);
+ return ve;
+ case STRING:
+ return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes());
+ case CHAR:
+ return new ConstantVectorExpression(outCol, ((HiveChar) constantValue));
+ case VARCHAR:
+ return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue));
+ default:
+ throw new HiveException("Unsupported constant type: " + typeName);
+ }
}
/**
@@ -799,7 +844,15 @@ public class VectorizationContext {
builder.setMode(mode);
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
- builder.setArgumentType(i, child.getTypeString());
+ String childTypeString = child.getTypeString();
+ if (childTypeString == null) {
+ throw new HiveException("Null child type name string");
+ }
+ String undecoratedTypeName = getUndecoratedName(childTypeString);
+ if (undecoratedTypeName == null) {
+ throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method");
+ }
+ builder.setArgumentType(i, undecoratedTypeName);
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc)) {
builder.setInputExpressionType(i, InputExpressionType.COLUMN);
} else if (child instanceof ExprNodeConstantDesc) {
@@ -829,7 +882,11 @@ public class VectorizationContext {
try {
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
- inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName());
+ String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName());
+ inputTypes[i] = VectorExpression.Type.getValue(undecoratedName);
+ if (inputTypes[i] == VectorExpression.Type.OTHER){
+ throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName);
+ }
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
@@ -870,36 +927,71 @@ public class VectorizationContext {
return Mode.PROJECTION;
}
+ private String getNewInstanceArgumentString(Object [] args) {
+ if (args == null) {
+ return "arguments: NULL";
+ }
+ ArrayList<String> argClasses = new ArrayList<String>();
+ for (Object obj : args) {
+ argClasses.add(obj.getClass().getSimpleName());
+ }
+ return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString();
+ }
+
private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnType, Object...args)
throws HiveException {
VectorExpression ve = null;
Constructor<?> ctor = getConstructor(vclass);
int numParams = ctor.getParameterTypes().length;
int argsLength = (args == null) ? 0 : args.length;
- try {
- if (numParams == 0) {
+ if (numParams == 0) {
+ try {
ve = (VectorExpression) ctor.newInstance();
- } else if (numParams == argsLength) {
+ } catch (Exception ex) {
+ throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " +
+ StringUtils.stringifyException(ex));
+ }
+ } else if (numParams == argsLength) {
+ try {
ve = (VectorExpression) ctor.newInstance(args);
- } else if (numParams == argsLength + 1) {
- // Additional argument is needed, which is the outputcolumn.
+ } catch (Exception ex) {
+ throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " +
+ StringUtils.stringifyException(ex));
+ }
+ } else if (numParams == argsLength + 1) {
+ // Additional argument is needed, which is the outputcolumn.
+ Object [] newArgs = null;
+ try {
String outType;
// Special handling for decimal because decimal types need scale and precision parameter.
// This special handling should be avoided by using returnType uniformly for all cases.
if (returnType != null) {
- outType = getNormalizedTypeName(returnType.getTypeName()).toLowerCase();
+ outType = getNormalizedName(returnType.getTypeName()).toLowerCase();
+ if (outType == null) {
+ throw new HiveException("No vector type for type name " + returnType);
+ }
} else {
outType = ((VectorExpression) vclass.newInstance()).getOutputType();
}
int outputCol = ocm.allocateOutputColumn(outType);
- Object [] newArgs = Arrays.copyOf(args, numParams);
+ newArgs = Arrays.copyOf(args, numParams);
newArgs[numParams-1] = outputCol;
+
ve = (VectorExpression) ctor.newInstance(newArgs);
ve.setOutputType(outType);
+ } catch (Exception ex) {
+ throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " +
+ StringUtils.stringifyException(ex));
+ }
+ }
+ // Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
+ if (ve instanceof TruncStringOutput) {
+ TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
+ if (returnType instanceof BaseCharTypeInfo) {
+ BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnType;
+ truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
}
- } catch (Exception ex) {
- throw new HiveException("Could not instantiate " + vclass.getSimpleName(), ex);
}
return ve;
}
@@ -933,6 +1025,10 @@ public class VectorizationContext {
}
} else if (udf instanceof GenericUDFToDecimal) {
return getCastToDecimal(childExpr, returnType);
+ } else if (udf instanceof GenericUDFToChar) {
+ return getCastToChar(childExpr, returnType);
+ } else if (udf instanceof GenericUDFToVarchar) {
+ return getCastToVarChar(childExpr, returnType);
}
// Now do a general lookup
@@ -962,7 +1058,7 @@ public class VectorizationContext {
inputColumns[i++] = ve.getOutputColumn();
}
- int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName()));
+ int outColumn = ocm.allocateOutputColumn(returnType.getTypeName());
VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn);
vectorCoalesce.setOutputType(returnType.getTypeName());
vectorCoalesce.setChildExpressions(vectorChildren);
@@ -989,7 +1085,7 @@ public class VectorizationContext {
inputColumns[i++] = ve.getOutputColumn();
}
- int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName()));
+ int outColumn = ocm.allocateOutputColumn(returnType.getTypeName());
VectorElt vectorElt = new VectorElt(inputColumns, outColumn);
vectorElt.setOutputType(returnType.getTypeName());
vectorElt.setChildExpressions(vectorChildren);
@@ -1265,6 +1361,64 @@ public class VectorizationContext {
throw new HiveException("Unhandled cast input type: " + inputType);
}
+ private VectorExpression getCastToChar(List<ExprNodeDesc> childExpr, TypeInfo returnType)
+ throws HiveException {
+ ExprNodeDesc child = childExpr.get(0);
+ String inputType = childExpr.get(0).getTypeString();
+ if (child instanceof ExprNodeConstantDesc) {
+ // Don't do constant folding here. Wait until the optimizer is changed to do it.
+ // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
+ return null;
+ }
+ if (inputType.equals("boolean")) {
+ // Boolean must come before the integer family. It's a special case.
+ return createVectorExpression(CastBooleanToCharViaLongToChar.class, childExpr, Mode.PROJECTION, null);
+ } else if (isIntFamily(inputType)) {
+ return createVectorExpression(CastLongToChar.class, childExpr, Mode.PROJECTION, null);
+ } else if (isDecimalFamily(inputType)) {
+ return createVectorExpression(CastDecimalToChar.class, childExpr, Mode.PROJECTION, returnType);
+ } else if (isDateFamily(inputType)) {
+ return createVectorExpression(CastDateToChar.class, childExpr, Mode.PROJECTION, returnType);
+ } else if (isStringFamily(inputType)) {
+ return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType);
+ }
+
+ /*
+ * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
+ */
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToVarChar(List<ExprNodeDesc> childExpr, TypeInfo returnType)
+ throws HiveException {
+ ExprNodeDesc child = childExpr.get(0);
+ String inputType = childExpr.get(0).getTypeString();
+ if (child instanceof ExprNodeConstantDesc) {
+ // Don't do constant folding here. Wait until the optimizer is changed to do it.
+ // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
+ return null;
+ }
+ if (inputType.equals("boolean")) {
+ // Boolean must come before the integer family. It's a special case.
+ return createVectorExpression(CastBooleanToVarCharViaLongToVarChar.class, childExpr, Mode.PROJECTION, null);
+ } else if (isIntFamily(inputType)) {
+ return createVectorExpression(CastLongToVarChar.class, childExpr, Mode.PROJECTION, null);
+ } else if (isDecimalFamily(inputType)) {
+ return createVectorExpression(CastDecimalToVarChar.class, childExpr, Mode.PROJECTION, returnType);
+ } else if (isDateFamily(inputType)) {
+ return createVectorExpression(CastDateToVarChar.class, childExpr, Mode.PROJECTION, returnType);
+ } else if (isStringFamily(inputType)) {
+ return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType);
+ }
+
+ /*
+ * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
+ */
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
private VectorExpression getCastToDoubleExpression(Class<?> udf, List<ExprNodeDesc> childExpr,
TypeInfo returnType) throws HiveException {
ExprNodeDesc child = childExpr.get(0);
@@ -1304,12 +1458,12 @@ public class VectorizationContext {
return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION);
}
// Long and double are handled using descriptors, string needs to be specially handled.
- if (inputType.equals("string")) {
+ if (isStringFamily(inputType)) {
// string casts to false if it is 0 characters long, otherwise true
VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr,
Mode.PROJECTION, null);
- int outputCol = ocm.allocateOutputColumn("integer");
+ int outputCol = ocm.allocateOutputColumn("Long");
VectorExpression lenToBoolExpr =
new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol);
lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr});
@@ -1411,6 +1565,14 @@ public class VectorizationContext {
cl = FilterStringColumnBetween.class;
} else if (colType.equals("string") && notKeywordPresent) {
cl = FilterStringColumnNotBetween.class;
+ } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
+ cl = FilterVarCharColumnBetween.class;
+ } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
+ cl = FilterVarCharColumnNotBetween.class;
+ } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
+ cl = FilterCharColumnBetween.class;
+ } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
+ cl = FilterCharColumnNotBetween.class;
} else if (colType.equals("timestamp")) {
// Get timestamp boundary values as longs instead of the expected strings
@@ -1483,13 +1645,13 @@ public class VectorizationContext {
// Allocate output column and get column number;
int outputCol = -1;
- String resultType = expr.getTypeInfo().getTypeName();
- String resultColVectorType = getNormalizedTypeName(resultType);
+ String resultTypeName = expr.getTypeInfo().getTypeName();
- outputCol = ocm.allocateOutputColumn(resultColVectorType);
+ outputCol = ocm.allocateOutputColumn(resultTypeName);
// Make vectorized operator
- VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs);
+ String normalizedName = getNormalizedName(resultTypeName);
+ VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, normalizedName, argDescs);
// Set child expressions
VectorExpression[] childVEs = null;
@@ -1509,7 +1671,7 @@ public class VectorizationContext {
}
public static boolean isStringFamily(String resultType) {
- return resultType.equalsIgnoreCase("string");
+ return resultType.equalsIgnoreCase("string") || charVarcharTypePattern.matcher(resultType).matches();
}
public static boolean isDatetimeFamily(String resultType) {
@@ -1617,7 +1779,7 @@ public class VectorizationContext {
"Non-constant argument not supported for vectorization.");
}
ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
- if (constExpr.getTypeString().equals("string")) {
+ if (isStringFamily(constExpr.getTypeString())) {
// create expression tree with type cast from string to timestamp
ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc();
@@ -1667,63 +1829,99 @@ public class VectorizationContext {
}
}
- static String getNormalizedTypeName(String colType){
- String normalizedType = null;
- if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) {
- normalizedType = "Double";
- } else if (colType.equalsIgnoreCase("String")) {
- normalizedType = "String";
- } else if (decimalTypePattern.matcher(colType).matches()) {
+ static String getNormalizedName(String hiveTypeName) {
+ VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
+ switch (argType) {
+ case INT_FAMILY:
+ return "Long";
+ case FLOAT_FAMILY:
+ return "Double";
+ case DECIMAL:
//Return the decimal type as is, it includes scale and precision.
- normalizedType = colType;
- } else {
- normalizedType = "Long";
+ return hiveTypeName;
+ case STRING:
+ return "String";
+ case CHAR:
+ //Return the CHAR type as is, it includes maximum length
+ return hiveTypeName;
+ case VARCHAR:
+ //Return the VARCHAR type as is, it includes maximum length.
+ return hiveTypeName;
+ case DATE:
+ return "Date";
+ case TIMESTAMP:
+ return "Timestamp";
+ default:
+ return "None";
+ }
+ }
+
+ static String getUndecoratedName(String hiveTypeName) {
+ VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
+ switch (argType) {
+ case INT_FAMILY:
+ return "Long";
+ case FLOAT_FAMILY:
+ return "Double";
+ case DECIMAL:
+ return "Decimal";
+ case STRING:
+ return "String";
+ case CHAR:
+ return "Char";
+ case VARCHAR:
+ return "VarChar";
+ case DATE:
+ return "Date";
+ case TIMESTAMP:
+ return "Timestamp";
+ default:
+ return "None";
}
- return normalizedType;
}
- static Object[][] aggregatesDefinition = {
- {"min", "Long", VectorUDAFMinLong.class},
- {"min", "Double", VectorUDAFMinDouble.class},
- {"min", "String", VectorUDAFMinString.class},
- {"min", "Decimal",VectorUDAFMinDecimal.class},
- {"max", "Long", VectorUDAFMaxLong.class},
- {"max", "Double", VectorUDAFMaxDouble.class},
- {"max", "String", VectorUDAFMaxString.class},
- {"max", "Decimal",VectorUDAFMaxDecimal.class},
- {"count", null, VectorUDAFCountStar.class},
- {"count", "Long", VectorUDAFCount.class},
- {"count", "Double", VectorUDAFCount.class},
- {"count", "String", VectorUDAFCount.class},
- {"count", "Decimal",VectorUDAFCount.class},
- {"sum", "Long", VectorUDAFSumLong.class},
- {"sum", "Double", VectorUDAFSumDouble.class},
- {"sum", "Decimal",VectorUDAFSumDecimal.class},
- {"avg", "Long", VectorUDAFAvgLong.class},
- {"avg", "Double", VectorUDAFAvgDouble.class},
- {"avg", "Decimal",VectorUDAFAvgDecimal.class},
- {"variance", "Long", VectorUDAFVarPopLong.class},
- {"var_pop", "Long", VectorUDAFVarPopLong.class},
- {"variance", "Double", VectorUDAFVarPopDouble.class},
- {"var_pop", "Double", VectorUDAFVarPopDouble.class},
- {"variance", "Decimal",VectorUDAFVarPopDecimal.class},
- {"var_pop", "Decimal",VectorUDAFVarPopDecimal.class},
- {"var_samp", "Long", VectorUDAFVarSampLong.class},
- {"var_samp" , "Double", VectorUDAFVarSampDouble.class},
- {"var_samp" , "Decimal",VectorUDAFVarSampDecimal.class},
- {"std", "Long", VectorUDAFStdPopLong.class},
- {"stddev", "Long", VectorUDAFStdPopLong.class},
- {"stddev_pop","Long", VectorUDAFStdPopLong.class},
- {"std", "Double", VectorUDAFStdPopDouble.class},
- {"stddev", "Double", VectorUDAFStdPopDouble.class},
- {"stddev_pop","Double", VectorUDAFStdPopDouble.class},
- {"std", "Decimal",VectorUDAFStdPopDecimal.class},
- {"stddev", "Decimal",VectorUDAFStdPopDecimal.class},
- {"stddev_pop","Decimal",VectorUDAFStdPopDecimal.class},
- {"stddev_samp","Long", VectorUDAFStdSampLong.class},
- {"stddev_samp","Double",VectorUDAFStdSampDouble.class},
- {"stddev_samp","Decimal",VectorUDAFStdSampDecimal.class},
- };
+ static ArrayList<AggregateDefinition> aggregatesDefinition = new ArrayList<AggregateDefinition>() {{
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFMinLong.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFMinDouble.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorUDAFMinString.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFMinDecimal.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFMaxLong.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFMaxDouble.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorUDAFMaxString.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFMaxDecimal.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, VectorUDAFCountStar.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFCount.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFCount.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorUDAFCount.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFCount.class));
+ add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFSumLong.class));
+ add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFSumDouble.class));
+ add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFSumDecimal.class));
+ add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFAvgLong.class));
+ add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFAvgDouble.class));
+ add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFAvgDecimal.class));
+ add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFVarPopLong.class));
+ add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFVarPopLong.class));
+ add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFVarPopDouble.class));
+ add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFVarPopDouble.class));
+ add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFVarPopDecimal.class));
+ add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFVarPopDecimal.class));
+ add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFVarSampLong.class));
+ add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFVarSampDouble.class));
+ add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFVarSampDecimal.class));
+ add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdSampLong.class));
+ add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdSampDouble.class));
+ add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdSampDecimal.class));
+ }};
public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc)
throws HiveException {
@@ -1737,22 +1935,22 @@ public class VectorizationContext {
}
String aggregateName = desc.getGenericUDAFName();
- String inputType = null;
+ VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE;
if (paramDescList.size() > 0) {
ExprNodeDesc inputExpr = paramDescList.get(0);
- inputType = getNormalizedTypeName(inputExpr.getTypeString());
- if (decimalTypePattern.matcher(inputType).matches()) {
- inputType = "Decimal";
+ inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString());
+ if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) {
+ throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString());
}
}
- for (Object[] aggDef : aggregatesDefinition) {
- if (aggregateName.equalsIgnoreCase((String) aggDef[0]) &&
- ((aggDef[1] == null && inputType == null) ||
- (aggDef[1] != null && aggDef[1].equals(inputType)))) {
- Class<? extends VectorAggregateExpression> aggClass =
- (Class<? extends VectorAggregateExpression>) (aggDef[2]);
+ for (AggregateDefinition aggDef : aggregatesDefinition) {
+ if (aggregateName.equalsIgnoreCase(aggDef.getName()) &&
+ ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE &&
+ inputType == VectorExpressionDescriptor.ArgumentType.NONE) ||
+ (aggDef.getType().isSameTypeOrFamily(inputType)))) {
+ Class<? extends VectorAggregateExpression> aggClass = aggDef.getAggClass();
try
{
Constructor<? extends VectorAggregateExpression> ctor =
@@ -1769,7 +1967,7 @@ public class VectorizationContext {
}
throw new HiveException("Vector aggregate not implemented: \"" + aggregateName +
- "\" for type: \"" + inputType + "");
+ "\" for type: \"" + inputType.name() + "");
}
public Map<Integer, String> getOutputColumnTypeMap() {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Fri Sep 5 19:15:44 2014
@@ -23,11 +23,16 @@ import java.sql.Timestamp;
import java.util.LinkedList;
import java.util.List;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -126,6 +131,8 @@ public class VectorizedBatchUtil {
break;
case BINARY:
case STRING:
+ case CHAR:
+ case VARCHAR:
cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE));
break;
case DECIMAL:
@@ -375,6 +382,51 @@ public class VectorizedBatchUtil {
}
}
break;
+ case CHAR: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
+ if (writableCol != null) {
+ bcv.isNull[rowIndex] = false;
+ HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
+ byte[] bytes = colHiveChar.getStrippedValue().getBytes();
+
+ // We assume the CHAR maximum length was enforced when the object was created.
+ int length = bytes.length;
+
+ int start = buffer.getLength();
+ try {
+ // In vector mode, we store CHAR as unpadded.
+ buffer.write(bytes, 0, length);
+ } catch (IOException ioe) {
+ throw new IllegalStateException("bad write", ioe);
+ }
+ bcv.setRef(rowIndex, buffer.getData(), start, length);
+ } else {
+ setNullColIsNullValue(bcv, rowIndex);
+ }
+ }
+ break;
+ case VARCHAR: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
+ if (writableCol != null) {
+ bcv.isNull[rowIndex] = false;
+ HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
+ byte[] bytes = colHiveVarchar.getValue().getBytes();
+
+ // We assume the VARCHAR maximum length was enforced when the object was created.
+ int length = bytes.length;
+
+ int start = buffer.getLength();
+ try {
+ buffer.write(bytes, 0, length);
+ } catch (IOException ioe) {
+ throw new IllegalStateException("bad write", ioe);
+ }
+ bcv.setRef(rowIndex, buffer.getData(), start, length);
+ } else {
+ setNullColIsNullValue(bcv, rowIndex);
+ }
+ }
+ break;
case DECIMAL:
DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i];
if (writableCol != null) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java Fri Sep 5 19:15:44 2014
@@ -158,7 +158,10 @@ public class VectorizedColumnarSerDe ext
serializeVectorStream.write(bytes, 0, bytes.length);
}
break;
- case STRING: {
+ case STRING:
+ case CHAR:
+ case VARCHAR: {
+ // Is it correct to escape CHAR and VARCHAR?
BytesColumnVector bcv = (BytesColumnVector) batch.cols[k];
LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex],
bcv.start[rowIndex],
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Fri Sep 5 19:15:44 2014
@@ -278,7 +278,7 @@ public class VectorizedRowBatchCtx {
case PRIMITIVE: {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
// Vectorization currently only supports the following data types:
- // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP,
+ // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, CHAR, VARCHAR, TIMESTAMP,
// DATE and DECIMAL
switch (poi.getPrimitiveCategory()) {
case BOOLEAN:
@@ -296,6 +296,8 @@ public class VectorizedRowBatchCtx {
break;
case BINARY:
case STRING:
+ case CHAR:
+ case VARCHAR:
result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
break;
case DECIMAL:
@@ -544,7 +546,9 @@ public class VectorizedRowBatchCtx {
}
break;
- case STRING: {
+ case STRING:
+ case CHAR:
+ case VARCHAR: {
BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
String sVal = (String) value;
if (sVal == null) {
@@ -566,13 +570,17 @@ public class VectorizedRowBatchCtx {
}
}
- private void addScratchColumnsToBatch(VectorizedRowBatch vrb) {
+ private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException {
if (columnTypeMap != null && !columnTypeMap.isEmpty()) {
int origNumCols = vrb.numCols;
int newNumCols = vrb.cols.length+columnTypeMap.keySet().size();
vrb.cols = Arrays.copyOf(vrb.cols, newNumCols);
for (int i = origNumCols; i < newNumCols; i++) {
- vrb.cols[i] = allocateColumnVector(columnTypeMap.get(i),
+ String typeName = columnTypeMap.get(i);
+ if (typeName == null) {
+ throw new HiveException("No type found for column type entry " + i);
+ }
+ vrb.cols[i] = allocateColumnVector(typeName,
VectorizedRowBatch.DEFAULT_SIZE);
}
vrb.numCols = vrb.cols.length;
@@ -599,13 +607,17 @@ public class VectorizedRowBatchCtx {
private ColumnVector allocateColumnVector(String type, int defaultSize) {
if (type.equalsIgnoreCase("double")) {
return new DoubleColumnVector(defaultSize);
- } else if (type.equalsIgnoreCase("string")) {
+ } else if (VectorizationContext.isStringFamily(type)) {
return new BytesColumnVector(defaultSize);
} else if (VectorizationContext.decimalTypePattern.matcher(type).matches()){
int [] precisionScale = getScalePrecisionFromDecimalType(type);
return new DecimalColumnVector(defaultSize, precisionScale[0], precisionScale[1]);
- } else {
+ } else if (type.equalsIgnoreCase("long") ||
+ type.equalsIgnoreCase("date") ||
+ type.equalsIgnoreCase("timestamp")) {
return new LongColumnVector(defaultSize);
+ } else {
+ throw new Error("Cannot allocate vector column for " + type);
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java Fri Sep 5 19:15:44 2014
@@ -420,8 +420,8 @@ public abstract class AbstractFilterStri
VectorExpressionDescriptor.Mode.FILTER)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("string"),
- VectorExpressionDescriptor.ArgumentType.getType("string"))
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java Fri Sep 5 19:15:44 2014
@@ -22,17 +22,18 @@ import org.apache.hadoop.hive.ql.exec.ve
public class CastBooleanToStringViaLongToString extends LongToStringUnaryUDF {
private static final long serialVersionUID = 1L;
- private transient byte[] temp; // space to put date string
private static final byte[][] dictionary = { {'F', 'A', 'L', 'S', 'E'}, {'T', 'R', 'U', 'E'} };
+ public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ }
+
public CastBooleanToStringViaLongToString() {
super();
- temp = new byte[8];
}
- public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) {
- super(inputColumn, outputColumn);
- temp = new byte[8];
+ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
+ outV.setVal(i, bytes, 0, length);
}
@Override
@@ -41,6 +42,6 @@ public class CastBooleanToStringViaLongT
/* 0 is false and 1 is true in the input vector, so a simple dictionary is used
* with two entries. 0 references FALSE and 1 references TRUE in the dictionary.
*/
- outV.setVal(i, dictionary[(int) vector[i]], 0, dictionary[(int) vector[i]].length);
+ assign(outV, i, dictionary[(int) vector[i]], dictionary[(int) vector[i]].length);
}
-}
+}
\ No newline at end of file
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java Fri Sep 5 19:15:44 2014
@@ -35,10 +35,15 @@ public class CastDateToString extends Lo
super(inputColumn, outputColumn);
}
+ // The assign method will be overridden for CHAR and VARCHAR.
+ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
+ outV.setVal(i, bytes, 0, length);
+ }
+
@Override
protected void func(BytesColumnVector outV, long[] vector, int i) {
dt.setTime(DateWritable.daysToMillis((int) vector[i]));
byte[] temp = dt.toString().getBytes();
- outV.setVal(i, temp, 0, temp.length);
+ assign(outV, i, temp, temp.length);
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java?rev=1622763&r1=1622762&r2=1622763&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java Fri Sep 5 19:15:44 2014
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;