You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/08 06:38:26 UTC
svn commit: r1623263 [23/28] - in /hive/branches/spark: ./
accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/
ant/src/org/apache/hadoop/hive/ant/ beeline/src/java/org/apache/hive/beeline/
beeline/src/test/org/apache/hive/beeline/ bin/...
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java Mon Sep 8 04:38:17 2014
@@ -100,7 +100,7 @@ public class CompactorMR {
* @throws java.io.IOException if the job fails
*/
void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd,
- ValidTxnList txns, boolean isMajor) throws IOException {
+ ValidTxnList txns, boolean isMajor, Worker.StatsUpdater su) throws IOException {
JobConf job = new JobConf(conf);
job.setJobName(jobName);
job.setOutputKeyClass(NullWritable.class);
@@ -120,7 +120,7 @@ public class CompactorMR {
job.setBoolean(IS_MAJOR, isMajor);
job.setBoolean(IS_COMPRESSED, sd.isCompressed());
job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
- job.setInt(NUM_BUCKETS, sd.getBucketColsSize());
+ job.setInt(NUM_BUCKETS, sd.getNumBuckets());
job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
setColumnTypes(job, sd.getCols());
@@ -182,6 +182,7 @@ public class CompactorMR {
LOG.debug("Setting maximume transaction to " + maxTxn);
JobClient.runJob(job).waitForCompletion();
+ su.gatherStats();
}
/**
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java Mon Sep 8 04:38:17 2014
@@ -43,6 +43,7 @@ import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.TimeUnit;
/**
* A class to initiate compactions. This will run in a separate thread.
@@ -50,7 +51,6 @@ import java.util.Set;
public class Initiator extends CompactorThread {
static final private String CLASS_NAME = Initiator.class.getName();
static final private Log LOG = LogFactory.getLog(CLASS_NAME);
- static final private int threadId = 10000;
static final private String NO_COMPACTION = "NO_AUTO_COMPACTION";
@@ -63,7 +63,7 @@ public class Initiator extends Compactor
try {
recoverFailedCompactions(false);
- int abortedThreashold = HiveConf.getIntVar(conf,
+ int abortedThreshold = HiveConf.getIntVar(conf,
HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
// Make sure we run through the loop once before checking to stop as this makes testing
@@ -77,7 +77,7 @@ public class Initiator extends Compactor
try {
ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
ValidTxnList txns = TxnHandler.createValidTxnList(txnHandler.getOpenTxns());
- Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreashold);
+ Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
LOG.debug("Found " + potentials.size() + " potential compactions, " +
"checking to see if we should compact any of them");
for (CompactionInfo ci : potentials) {
@@ -140,13 +140,13 @@ public class Initiator extends Compactor
public void init(BooleanPointer stop) throws MetaException {
super.init(stop);
checkInterval =
- HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL) * 1000;
+ conf.getTimeVar(HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL, TimeUnit.MILLISECONDS) ;
}
private void recoverFailedCompactions(boolean remoteOnly) throws MetaException {
if (!remoteOnly) txnHandler.revokeFromLocalWorkers(Worker.hostname());
- txnHandler.revokeTimedoutWorkers(HiveConf.getLongVar(conf,
- HiveConf.ConfVars.HIVE_COMPACTOR_WORKER_TIMEOUT));
+ txnHandler.revokeTimedoutWorkers(HiveConf.getTimeVar(conf,
+ HiveConf.ConfVars.HIVE_COMPACTOR_WORKER_TIMEOUT, TimeUnit.MILLISECONDS));
}
// Figure out if there are any currently running compactions on the same table or partition.
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java Mon Sep 8 04:38:17 2014
@@ -20,20 +20,28 @@ package org.apache.hadoop.hive.ql.txn.co
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.ValidTxnList;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.TxnHandler;
-import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.ql.CommandNeedRetryException;
+import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
+import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.security.PrivilegedExceptionAction;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
/**
* A class to do compactions. This will run in a separate thread. It will spin on the
@@ -110,7 +118,7 @@ public class Worker extends CompactorThr
continue;
}
- final boolean isMajor = (ci.type == CompactionType.MAJOR);
+ final boolean isMajor = ci.isMajorCompaction();
final ValidTxnList txns =
TxnHandler.createValidTxnList(txnHandler.getOpenTxns());
final StringBuffer jobName = new StringBuffer(name);
@@ -129,17 +137,19 @@ public class Worker extends CompactorThr
LOG.info("Starting " + ci.type.toString() + " compaction for " +
ci.getFullPartitionName());
+ final StatsUpdater su = StatsUpdater.init(ci, txnHandler.findColumnsWithStats(ci), conf,
+ runJobAsSelf(runAs) ? runAs : t.getOwner());
final CompactorMR mr = new CompactorMR();
try {
if (runJobAsSelf(runAs)) {
- mr.run(conf, jobName.toString(), t, sd, txns, isMajor);
+ mr.run(conf, jobName.toString(), t, sd, txns, isMajor, su);
} else {
UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(),
UserGroupInformation.getLoginUser());
ugi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
- mr.run(conf, jobName.toString(), t, sd, txns, isMajor);
+ mr.run(conf, jobName.toString(), t, sd, txns, isMajor, su);
return null;
}
});
@@ -161,11 +171,95 @@ public class Worker extends CompactorThr
public void init(BooleanPointer stop) throws MetaException {
super.init(stop);
- StringBuffer name = new StringBuffer(hostname());
+ StringBuilder name = new StringBuilder(hostname());
name.append("-");
name.append(getId());
this.name = name.toString();
setName(name.toString());
}
+ static final class StatsUpdater {
+ static final private Log LOG = LogFactory.getLog(StatsUpdater.class);
+
+ public static StatsUpdater init(CompactionInfo ci, List<String> columnListForStats,
+ HiveConf conf, String userName) {
+ return new StatsUpdater(ci, columnListForStats, conf, userName);
+ }
+ /**
+ * list columns for which to compute stats. This maybe empty which means no stats gathering
+ * is needed.
+ */
+ private final List<String> columnList;
+ private final HiveConf conf;
+ private final String userName;
+ private final CompactionInfo ci;
+
+ private StatsUpdater(CompactionInfo ci, List<String> columnListForStats,
+ HiveConf conf, String userName) {
+ this.conf = conf;
+ this.userName = userName;
+ this.ci = ci;
+ if(!ci.isMajorCompaction() || columnListForStats == null || columnListForStats.isEmpty()) {
+ columnList = Collections.emptyList();
+ return;
+ }
+ columnList = columnListForStats;
+ }
+
+ /**
+ * todo: what should this do on failure? Should it rethrow? Invalidate stats?
+ */
+ void gatherStats() throws IOException {
+ if(!ci.isMajorCompaction()) {
+ return;
+ }
+ if(columnList.isEmpty()) {
+ LOG.debug("No existing stats for " + ci.dbname + "." + ci.tableName + " found. Will not run analyze.");
+ return;//nothing to do
+ }
+ //e.g. analyze table page_view partition(dt='10/15/2014',country=âUSâ)
+ // compute statistics for columns viewtime
+ StringBuilder sb = new StringBuilder("analyze table ").append(ci.dbname).append(".").append(ci.tableName);
+ if(ci.partName != null) {
+ try {
+ sb.append(" partition(");
+ Map<String, String> partitionColumnValues = Warehouse.makeEscSpecFromName(ci.partName);
+ for(Map.Entry<String, String> ent : partitionColumnValues.entrySet()) {
+ sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'");
+ }
+ sb.append(")");
+ }
+ catch(MetaException ex) {
+ throw new IOException(ex);
+ }
+ }
+ sb.append(" compute statistics for columns ");
+ for(String colName : columnList) {
+ sb.append(colName).append(",");
+ }
+ sb.setLength(sb.length() - 1);//remove trailing ,
+ LOG.debug("running '" + sb.toString() + "'");
+ Driver d = new Driver(conf, userName);
+ SessionState localSession = null;
+ if(SessionState.get() == null) {
+ localSession = SessionState.start(new SessionState(conf));
+ }
+ try {
+ CommandProcessorResponse cpr = d.run(sb.toString());
+ if (cpr.getResponseCode() != 0) {
+ throw new IOException("Could not update stats for table " + ci.getFullTableName() +
+ (ci.partName == null ? "" : "/" + ci.partName) + " due to: " + cpr);
+ }
+ }
+ catch(CommandNeedRetryException cnre) {
+ throw new IOException("Could not update stats for table " + ci.getFullTableName() +
+ (ci.partName == null ? "" : "/" + ci.partName) + " due to: " + cnre.getMessage());
+ }
+ finally {
+ if(localSession != null) {
+ localSession.close();
+ }
+ }
+ }
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java Mon Sep 8 04:38:17 2014
@@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDateToBooleanViaLongToLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToBooleanViaLongToLong;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -43,6 +45,7 @@ import org.apache.hadoop.io.Text;
*
*/
@VectorizedExpressions({CastLongToBooleanViaLongToLong.class,
+ CastDateToBooleanViaLongToLong.class, CastTimestampToBooleanViaLongToLong.class,
CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class})
public class UDFToBoolean extends UDF {
private final BooleanWritable booleanWritable = new BooleanWritable();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java Mon Sep 8 04:38:17 2014
@@ -34,43 +34,38 @@ import org.apache.hadoop.io.IntWritable;
@WindowFunctionDescription
(
- description = @Description(
- name = "cume_dist",
- value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " +
- "statistical books) computes the position of a specified value relative to a set of values. " +
- "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " +
- "CUME_DIST(x) = number of values in S coming before " +
- " and including x in the specified order/ N"
- ),
- supportsWindow = false,
- pivotResult = true,
- rankingFunction = true,
- impliesOrder = true
+ description = @Description(
+ name = "cume_dist",
+ value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " +
+ "statistical books) computes the position of a specified value relative to a set of values. " +
+ "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " +
+ "CUME_DIST(x) = number of values in S coming before " +
+ " and including x in the specified order/ N"
+ ),
+ supportsWindow = false,
+ pivotResult = true,
+ rankingFunction = true,
+ impliesOrder = true
)
-public class GenericUDAFCumeDist extends GenericUDAFRank
-{
+public class GenericUDAFCumeDist extends GenericUDAFRank {
- static final Log LOG = LogFactory.getLog(GenericUDAFCumeDist.class.getName());
+ static final Log LOG = LogFactory.getLog(GenericUDAFCumeDist.class.getName());
- @Override
- protected GenericUDAFAbstractRankEvaluator createEvaluator()
- {
- return new GenericUDAFCumeDistEvaluator();
- }
+ @Override
+ protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ return new GenericUDAFCumeDistEvaluator();
+ }
- public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator
- {
+ public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator {
@Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException
- {
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
super.init(m, parameters);
return ObjectInspectorFactory
.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
}
@Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
+ public Object terminate(AggregationBuffer agg) throws HiveException {
List<IntWritable> ranks = ((RankBuffer) agg).rowNums;
int ranksSize = ranks.size();
double ranksSizeDouble = ranksSize;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java Mon Sep 8 04:38:17 2014
@@ -23,41 +23,38 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription;
-@WindowFunctionDescription
-(
- description = @Description(
- name = "dense_rank",
- value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " +
- "gaps in ranking sequence when there are ties. That is, if you were " +
- "ranking a competition using DENSE_RANK and had three people tie for " +
- "second place, you would say that all three were in second place and " +
- "that the next person came in third."
- ),
- supportsWindow = false,
- pivotResult = true,
- rankingFunction = true,
- impliesOrder = true
+@WindowFunctionDescription(
+ description = @Description(
+ name = "dense_rank",
+ value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " +
+ "gaps in ranking sequence when there are ties. That is, if you were " +
+ "ranking a competition using DENSE_RANK and had three people tie for " +
+ "second place, you would say that all three were in second place and " +
+ "that the next person came in third."
+ ),
+ supportsWindow = false,
+ pivotResult = true,
+ rankingFunction = true,
+ impliesOrder = true
)
-public class GenericUDAFDenseRank extends GenericUDAFRank
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFDenseRank.class.getName());
-
- @Override
- protected GenericUDAFAbstractRankEvaluator createEvaluator()
- {
- return new GenericUDAFDenseRankEvaluator();
- }
-
- public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator
- {
- /*
- * Called when the value in the partition has changed. Update the currentRank
- */
- @Override
- protected void nextRank(RankBuffer rb)
- {
- rb.currentRank++;
- }
- }
+public class GenericUDAFDenseRank extends GenericUDAFRank {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFDenseRank.class.getName());
+
+ @Override
+ protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ return new GenericUDAFDenseRankEvaluator();
+ }
+
+ public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator {
+
+ /*
+ * Called when the value in the partition has changed. Update the currentRank
+ */
+ @Override
+ protected void nextRank(RankBuffer rb) {
+ rb.currentRank++;
+ }
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java Mon Sep 8 04:38:17 2014
@@ -41,147 +41,128 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-@WindowFunctionDescription
-(
- description = @Description(
- name = "first_value",
- value = "_FUNC_(x)"
- ),
- supportsWindow = true,
- pivotResult = false,
- impliesOrder = true
+@WindowFunctionDescription(
+ description = @Description(
+ name = "first_value",
+ value = "_FUNC_(x)"
+ ),
+ supportsWindow = true,
+ pivotResult = false,
+ impliesOrder = true
)
-public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFFirstValue.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException
- {
- if (parameters.length > 2)
- {
- throw new UDFArgumentTypeException(2, "At most 2 arguments expected");
- }
- if ( parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo) )
- {
- throw new UDFArgumentTypeException(1, "second argument must be a boolean expression");
- }
- return createEvaluator();
- }
-
- protected GenericUDAFFirstValueEvaluator createEvaluator()
- {
- return new GenericUDAFFirstValueEvaluator();
- }
-
- static class FirstValueBuffer implements AggregationBuffer
- {
- Object val;
- boolean valSet;
- boolean firstRow;
- boolean skipNulls;
-
- FirstValueBuffer()
- {
- init();
- }
-
- void init()
- {
- val = null;
- valSet = false;
- firstRow = true;
- skipNulls = false;
- }
-
- }
-
- public static class GenericUDAFFirstValueEvaluator extends GenericUDAFEvaluator
- {
- ObjectInspector inputOI;
- ObjectInspector outputOI;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException
- {
- super.init(m, parameters);
- if (m != Mode.COMPLETE)
- {
- throw new HiveException(
- "Only COMPLETE mode supported for Rank function");
- }
- inputOI = parameters[0];
- outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, ObjectInspectorCopyOption.WRITABLE);
- return outputOI;
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException
- {
- return new FirstValueBuffer();
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException
- {
- ((FirstValueBuffer) agg).init();
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException
- {
- FirstValueBuffer fb = (FirstValueBuffer) agg;
-
- if (fb.firstRow )
- {
- fb.firstRow = false;
- if ( parameters.length == 2 )
- {
- fb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(
- parameters[1],
- PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
- }
- }
-
- if ( !fb.valSet )
- {
- fb.val = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, ObjectInspectorCopyOption.WRITABLE);
- if ( !fb.skipNulls || fb.val != null )
- {
- fb.valSet = true;
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg) throws HiveException
- {
- throw new HiveException("terminatePartial not supported");
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial) throws HiveException
- {
- throw new HiveException("merge not supported");
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
- return ((FirstValueBuffer) agg).val;
- }
-
+public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFFirstValue.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length > 2) {
+ throw new UDFArgumentTypeException(2, "At most 2 arguments expected");
+ }
+ if (parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo)) {
+ throw new UDFArgumentTypeException(1, "second argument must be a boolean expression");
+ }
+ return createEvaluator();
+ }
+
+ protected GenericUDAFFirstValueEvaluator createEvaluator() {
+ return new GenericUDAFFirstValueEvaluator();
+ }
+
+ static class FirstValueBuffer implements AggregationBuffer {
+
+ Object val;
+ boolean valSet;
+ boolean firstRow;
+ boolean skipNulls;
+
+ FirstValueBuffer() {
+ init();
+ }
+
+ void init() {
+ val = null;
+ valSet = false;
+ firstRow = true;
+ skipNulls = false;
+ }
+
+ }
+
+ public static class GenericUDAFFirstValueEvaluator extends GenericUDAFEvaluator {
+
+ ObjectInspector inputOI;
+ ObjectInspector outputOI;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ if (m != Mode.COMPLETE) {
+ throw new HiveException("Only COMPLETE mode supported for Rank function");
+ }
+ inputOI = parameters[0];
+ outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
+ ObjectInspectorCopyOption.WRITABLE);
+ return outputOI;
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new FirstValueBuffer();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((FirstValueBuffer) agg).init();
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ FirstValueBuffer fb = (FirstValueBuffer) agg;
+
+ if (fb.firstRow) {
+ fb.firstRow = false;
+ if (parameters.length == 2) {
+ fb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(parameters[1],
+ PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
+ }
+ }
+
+ if (!fb.valSet) {
+ fb.val = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI,
+ ObjectInspectorCopyOption.WRITABLE);
+ if (!fb.skipNulls || fb.val != null) {
+ fb.valSet = true;
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ throw new HiveException("terminatePartial not supported");
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ throw new HiveException("merge not supported");
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ return ((FirstValueBuffer) agg).val;
+ }
+
@Override
public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) {
BoundaryDef start = wFrmDef.getStart();
BoundaryDef end = wFrmDef.getEnd();
- return new FirstValStreamingFixedWindow(this, start.getAmt(),
- end.getAmt());
+ return new FirstValStreamingFixedWindow(this, start.getAmt(), end.getAmt());
}
- }
-
+ }
+
static class ValIndexPair {
+
Object val;
int idx;
@@ -191,16 +172,15 @@ public class GenericUDAFFirstValue exten
}
}
- static class FirstValStreamingFixedWindow extends
- GenericUDAFStreamingEvaluator<Object> {
+ static class FirstValStreamingFixedWindow extends GenericUDAFStreamingEvaluator<Object> {
class State extends GenericUDAFStreamingEvaluator<Object>.StreamingState {
+
private final Deque<ValIndexPair> valueChain;
public State(int numPreceding, int numFollowing, AggregationBuffer buf) {
super(numPreceding, numFollowing, buf);
- valueChain = new ArrayDeque<ValIndexPair>(numPreceding + numFollowing
- + 1);
+ valueChain = new ArrayDeque<ValIndexPair>(numPreceding + numFollowing + 1);
}
@Override
@@ -222,8 +202,8 @@ public class GenericUDAFFirstValue exten
*/
int wdwSz = numPreceding + numFollowing + 1;
- return underlying + (underlying * wdwSz) + (underlying * wdwSz)
- + (3 * JavaDataModel.PRIMITIVES1);
+ return underlying + (underlying * wdwSz) + (underlying * wdwSz) + (3
+ * JavaDataModel.PRIMITIVES1);
}
protected void reset() {
@@ -232,8 +212,8 @@ public class GenericUDAFFirstValue exten
}
}
- public FirstValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval,
- int numPreceding, int numFollowing) {
+ public FirstValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, int numPreceding,
+ int numFollowing) {
super(wrappedEval, numPreceding, numFollowing);
}
@@ -253,8 +233,7 @@ public class GenericUDAFFirstValue exten
}
@Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
State s = (State) agg;
FirstValueBuffer fb = (FirstValueBuffer) s.wrappedBuf;
@@ -266,15 +245,14 @@ public class GenericUDAFFirstValue exten
wrappedEval.iterate(fb, parameters);
}
- Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0],
- inputOI(), ObjectInspectorCopyOption.WRITABLE);
+ Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI(),
+ ObjectInspectorCopyOption.WRITABLE);
/*
* add row to chain. except in case of UNB preceding: - only 1 firstVal
* needs to be tracked.
*/
- if (s.numPreceding != BoundarySpec.UNBOUNDED_AMOUNT
- || s.valueChain.isEmpty()) {
+ if (s.numPreceding != BoundarySpec.UNBOUNDED_AMOUNT || s.valueChain.isEmpty()) {
/*
* add value to chain if it is not null or if skipNulls is false.
*/
@@ -309,8 +287,7 @@ public class GenericUDAFFirstValue exten
public Object terminate(AggregationBuffer agg) throws HiveException {
State s = (State) agg;
FirstValueBuffer fb = (FirstValueBuffer) s.wrappedBuf;
- ValIndexPair r = fb.skipNulls && s.valueChain.size() == 0 ? null
- : s.valueChain.getFirst();
+ ValIndexPair r = fb.skipNulls && s.valueChain.size() == 0 ? null : s.valueChain.getFirst();
for (int i = 0; i < s.numFollowing; i++) {
s.results.add(r == null ? null : r.val);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java Mon Sep 8 04:38:17 2014
@@ -37,131 +37,107 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false, impliesOrder = true)
-public class GenericUDAFLastValue extends AbstractGenericUDAFResolver
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException
- {
- if (parameters.length > 2)
- {
- throw new UDFArgumentTypeException(2, "At most 2 arguments expected");
- }
- if ( parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo) )
- {
- throw new UDFArgumentTypeException(1, "second argument must be a boolean expression");
- }
- return createEvaluator();
- }
-
- protected GenericUDAFLastValueEvaluator createEvaluator()
- {
- return new GenericUDAFLastValueEvaluator();
- }
-
- static class LastValueBuffer implements AggregationBuffer
- {
- Object val;
- boolean firstRow;
- boolean skipNulls;
-
- LastValueBuffer()
- {
- init();
- }
-
- void init()
- {
- val = null;
- firstRow = true;
- skipNulls = false;
- }
-
- }
-
- public static class GenericUDAFLastValueEvaluator extends
- GenericUDAFEvaluator
- {
- ObjectInspector inputOI;
- ObjectInspector outputOI;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException
- {
- super.init(m, parameters);
- if (m != Mode.COMPLETE)
- {
- throw new HiveException(
- "Only COMPLETE mode supported for Rank function");
- }
- inputOI = parameters[0];
- outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
- ObjectInspectorCopyOption.WRITABLE);
- return outputOI;
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException
- {
- return new LastValueBuffer();
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException
- {
- ((LastValueBuffer) agg).init();
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException
- {
- LastValueBuffer lb = (LastValueBuffer) agg;
- if (lb.firstRow )
- {
- lb.firstRow = false;
- if ( parameters.length == 2 )
- {
- lb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(
- parameters[1],
- PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
- }
- }
-
- Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0],
- inputOI, ObjectInspectorCopyOption.WRITABLE);
+@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"),
+ supportsWindow = true, pivotResult = false, impliesOrder = true)
+public class GenericUDAFLastValue extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length > 2) {
+ throw new UDFArgumentTypeException(2, "At most 2 arguments expected");
+ }
+ if (parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo)) {
+ throw new UDFArgumentTypeException(1, "second argument must be a boolean expression");
+ }
+ return createEvaluator();
+ }
+
+ protected GenericUDAFLastValueEvaluator createEvaluator() {
+ return new GenericUDAFLastValueEvaluator();
+ }
+
+ static class LastValueBuffer implements AggregationBuffer {
+
+ Object val;
+ boolean firstRow;
+ boolean skipNulls;
+
+ LastValueBuffer() {
+ init();
+ }
+
+ void init() {
+ val = null;
+ firstRow = true;
+ skipNulls = false;
+ }
+
+ }
+
+ public static class GenericUDAFLastValueEvaluator extends GenericUDAFEvaluator {
+
+ ObjectInspector inputOI;
+ ObjectInspector outputOI;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ if (m != Mode.COMPLETE) {
+ throw new HiveException("Only COMPLETE mode supported for Rank function");
+ }
+ inputOI = parameters[0];
+ outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
+ ObjectInspectorCopyOption.WRITABLE);
+ return outputOI;
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new LastValueBuffer();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((LastValueBuffer) agg).init();
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ LastValueBuffer lb = (LastValueBuffer) agg;
+ if (lb.firstRow) {
+ lb.firstRow = false;
+ if (parameters.length == 2) {
+ lb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(parameters[1],
+ PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
+ }
+ }
+
+ Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI,
+ ObjectInspectorCopyOption.WRITABLE);
if (!lb.skipNulls || o != null) {
lb.val = o;
}
- }
+ }
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException
- {
- throw new HiveException("terminatePartial not supported");
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException
- {
- throw new HiveException("merge not supported");
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
- LastValueBuffer lb = (LastValueBuffer) agg;
- return lb.val;
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ throw new HiveException("terminatePartial not supported");
+ }
- }
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ throw new HiveException("merge not supported");
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ LastValueBuffer lb = (LastValueBuffer) agg;
+ return lb.val;
+
+ }
@Override
public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) {
@@ -169,12 +145,12 @@ public class GenericUDAFLastValue extend
BoundaryDef end = wFrmDef.getEnd();
return new LastValStreamingFixedWindow(this, start.getAmt(), end.getAmt());
}
- }
+ }
- static class LastValStreamingFixedWindow extends
- GenericUDAFStreamingEvaluator<Object> {
+ static class LastValStreamingFixedWindow extends GenericUDAFStreamingEvaluator<Object> {
class State extends GenericUDAFStreamingEvaluator<Object>.StreamingState {
+
private Object lastValue;
private int lastIdx;
@@ -203,8 +179,8 @@ public class GenericUDAFLastValue extend
}
}
- public LastValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval,
- int numPreceding, int numFollowing) {
+ public LastValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, int numPreceding,
+ int numFollowing) {
super(wrappedEval, numPreceding, numFollowing);
}
@@ -224,8 +200,7 @@ public class GenericUDAFLastValue extend
}
@Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
State s = (State) agg;
LastValueBuffer lb = (LastValueBuffer) s.wrappedBuf;
@@ -237,8 +212,8 @@ public class GenericUDAFLastValue extend
wrappedEval.iterate(lb, parameters);
}
- Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0],
- inputOI(), ObjectInspectorCopyOption.WRITABLE);
+ Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI(),
+ ObjectInspectorCopyOption.WRITABLE);
if (!lb.skipNulls || o != null) {
s.lastValue = o;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java Mon Sep 8 04:38:17 2014
@@ -38,144 +38,129 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IntWritable;
-@WindowFunctionDescription
-(
- description = @Description(
- name = "rank",
- value = "_FUNC_(x) NTILE allows easy calculation of tertiles, quartiles, deciles and other " +
- "common summary statistics. This function divides an ordered partition into a specified " +
- "number of groups called buckets and assigns a bucket number to each row in the partition."
- ),
- supportsWindow = false,
- pivotResult = true
+@WindowFunctionDescription(
+ description = @Description(
+ name = "rank",
+ value = "_FUNC_(x) NTILE allows easy calculation of tertiles, quartiles, deciles and other "
+ +"common summary statistics. This function divides an ordered partition into a "
+ + "specified number of groups called buckets and assigns a bucket number to each row "
+ + "in the partition."
+ ),
+ supportsWindow = false,
+ pivotResult = true
)
-public class GenericUDAFNTile extends AbstractGenericUDAFResolver
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFNTile.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException
- {
- if (parameters.length != 1)
- {
- throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
- }
- ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
-
- boolean c = ObjectInspectorUtils.compareTypes(oi, PrimitiveObjectInspectorFactory.writableIntObjectInspector);
- if (!c)
- {
- throw new UDFArgumentTypeException(0, "Number of tiles must be an int expression");
- }
-
- return new GenericUDAFNTileEvaluator();
- }
-
- static class NTileBuffer implements AggregationBuffer
- {
- Integer numBuckets;
- int numRows;
-
- void init()
- {
- numBuckets = null;
- numRows = 0;
- }
-
- NTileBuffer()
- {
- init();
- }
- }
-
- public static class GenericUDAFNTileEvaluator extends GenericUDAFEvaluator
- {
- private transient PrimitiveObjectInspector inputOI;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException
- {
- assert (parameters.length == 1);
- super.init(m, parameters);
- if (m != Mode.COMPLETE)
- {
- throw new HiveException(
- "Only COMPLETE mode supported for NTile function");
- }
- inputOI = (PrimitiveObjectInspector) parameters[0];
- return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException
- {
- return new NTileBuffer();
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException
- {
- ((NTileBuffer) agg).init();
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException
- {
- NTileBuffer rb = (NTileBuffer) agg;
- if ( rb.numBuckets == null)
- {
- rb.numBuckets = PrimitiveObjectInspectorUtils.getInt(parameters[0], inputOI);
- }
- rb.numRows++;
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg) throws HiveException
- {
- throw new HiveException("terminatePartial not supported");
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial) throws HiveException
- {
- throw new HiveException("merge not supported");
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
- NTileBuffer rb = (NTileBuffer) agg;
- ArrayList<IntWritable> res = new ArrayList<IntWritable>(rb.numRows);
-
- /*
- * if there is a remainder from numRows/numBuckets; then distribute increase the size of the first 'rem' buckets by 1.
- */
-
- int bucketsz = rb.numRows / rb.numBuckets;
- int rem = rb.numRows % rb.numBuckets;
- int start = 0;
- int bucket = 1;
- while ( start < rb.numRows)
- {
- int end = start + bucketsz;
- if (rem > 0)
- {
- end++; rem--;
- }
- end = Math.min(rb.numRows, end);
- for(int i = start; i < end; i++)
- {
- res.add(new IntWritable(bucket));
- }
- start = end;
- bucket++;
- }
+public class GenericUDAFNTile extends AbstractGenericUDAFResolver {
- return res;
- }
+ static final Log LOG = LogFactory.getLog(GenericUDAFNTile.class.getName());
- }
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1,
+ "Exactly one argument is expected.");
+ }
+ ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
+
+ boolean c = ObjectInspectorUtils.compareTypes(oi,
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+ if (!c) {
+ throw new UDFArgumentTypeException(0, "Number of tiles must be an int expression");
+ }
+
+ return new GenericUDAFNTileEvaluator();
+ }
+
+ static class NTileBuffer implements AggregationBuffer {
+
+ Integer numBuckets;
+ int numRows;
+
+ void init() {
+ numBuckets = null;
+ numRows = 0;
+ }
+
+ NTileBuffer() {
+ init();
+ }
+ }
+
+ public static class GenericUDAFNTileEvaluator extends GenericUDAFEvaluator {
+
+ private transient PrimitiveObjectInspector inputOI;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+ if (m != Mode.COMPLETE) {
+ throw new HiveException("Only COMPLETE mode supported for NTile function");
+ }
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ return ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new NTileBuffer();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((NTileBuffer) agg).init();
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ NTileBuffer rb = (NTileBuffer) agg;
+ if (rb.numBuckets == null) {
+ rb.numBuckets = PrimitiveObjectInspectorUtils.getInt(parameters[0], inputOI);
+ }
+ rb.numRows++;
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ throw new HiveException("terminatePartial not supported");
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ throw new HiveException("merge not supported");
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ NTileBuffer rb = (NTileBuffer) agg;
+ ArrayList<IntWritable> res = new ArrayList<IntWritable>(rb.numRows);
+
+ /*
+ * if there is a remainder from numRows/numBuckets; then distribute increase the size of the first 'rem' buckets by 1.
+ */
+
+ int bucketsz = rb.numRows / rb.numBuckets;
+ int rem = rb.numRows % rb.numBuckets;
+ int start = 0;
+ int bucket = 1;
+ while (start < rb.numRows) {
+ int end = start + bucketsz;
+ if (rem > 0) {
+ end++;
+ rem--;
+ }
+ end = Math.min(rb.numRows, end);
+ for (int i = start; i < end; i++) {
+ res.add(new IntWritable(bucket));
+ }
+ start = end;
+ bucket++;
+ }
+ return res;
+ }
+
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java Mon Sep 8 04:38:17 2014
@@ -31,56 +31,52 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
-@WindowFunctionDescription
-(
- description = @Description(
- name = "percent_rank",
- value = "_FUNC_(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather " +
- "than row counts in its numerator. PERCENT_RANK of a row is calculated as: " +
- "(rank of row in its partition - 1) / (number of rows in the partition - 1)"
- ),
- supportsWindow = false,
- pivotResult = true,
- rankingFunction = true,
- impliesOrder = true
+@WindowFunctionDescription(
+ description = @Description(
+ name = "percent_rank",
+ value = "_FUNC_(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather " +
+ "than row counts in its numerator. PERCENT_RANK of a row is calculated as: " +
+ "(rank of row in its partition - 1) / (number of rows in the partition - 1)"
+ ),
+ supportsWindow = false,
+ pivotResult = true,
+ rankingFunction = true,
+ impliesOrder = true
)
-public class GenericUDAFPercentRank extends GenericUDAFRank
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFPercentRank.class.getName());
-
- @Override
- protected GenericUDAFAbstractRankEvaluator createEvaluator()
- {
- return new GenericUDAFPercentRankEvaluator();
- }
-
- public static class GenericUDAFPercentRankEvaluator extends GenericUDAFAbstractRankEvaluator
- {
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException
- {
- super.init(m, parameters);
- return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
- ArrayList<IntWritable> ranks = ((RankBuffer) agg).rowNums;
- double sz = ranks.size();
- if ( sz > 1 ) {
+public class GenericUDAFPercentRank extends GenericUDAFRank {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFPercentRank.class.getName());
+
+ @Override
+ protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ return new GenericUDAFPercentRankEvaluator();
+ }
+
+ public static class GenericUDAFPercentRankEvaluator extends GenericUDAFAbstractRankEvaluator {
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ return ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ ArrayList<IntWritable> ranks = ((RankBuffer) agg).rowNums;
+ double sz = ranks.size();
+ if (sz > 1) {
sz = sz - 1;
}
- ArrayList<DoubleWritable> pranks = new ArrayList<DoubleWritable>(ranks.size());
+ ArrayList<DoubleWritable> pranks = new ArrayList<DoubleWritable>(ranks.size());
+
+ for (IntWritable i : ranks) {
+ double pr = ((double) i.get() - 1) / sz;
+ pranks.add(new DoubleWritable(pr));
+ }
- for(IntWritable i : ranks)
- {
- double pr = ((double)i.get() - 1)/sz;
- pranks.add(new DoubleWritable(pr));
- }
-
- return pranks;
- }
- }
+ return pranks;
+ }
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java Mon Sep 8 04:38:17 2014
@@ -38,170 +38,150 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IntWritable;
-@WindowFunctionDescription
-(
- description = @Description(
- name = "rank",
- value = "_FUNC_(x)"
- ),
- supportsWindow = false,
- pivotResult = true,
- rankingFunction = true,
- impliesOrder = true
-)
-public class GenericUDAFRank extends AbstractGenericUDAFResolver
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFRank.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException
- {
- if (parameters.length < 1)
- {
- throw new UDFArgumentTypeException(parameters.length - 1, "One or more arguments are expected.");
- }
- for(int i=0; i<parameters.length; i++)
- {
- ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[i]);
- if (!ObjectInspectorUtils.compareSupported(oi))
- {
- throw new UDFArgumentTypeException(i,
- "Cannot support comparison of map<> type or complex type containing map<>.");
- }
- }
- return createEvaluator();
- }
-
- protected GenericUDAFAbstractRankEvaluator createEvaluator()
- {
- return new GenericUDAFRankEvaluator();
- }
-
- static class RankBuffer implements AggregationBuffer
- {
- ArrayList<IntWritable> rowNums;
- int currentRowNum;
- Object[] currVal;
- int currentRank;
- int numParams;
- boolean supportsStreaming;
-
- RankBuffer(int numParams, boolean supportsStreaming)
- {
- this.numParams = numParams;
- this.supportsStreaming = supportsStreaming;
- init();
- }
-
- void init()
- {
- rowNums = new ArrayList<IntWritable>();
- currentRowNum = 0;
- currentRank = 0;
- currVal = new Object[numParams];
- if ( supportsStreaming ) {
- /* initialize rowNums to have 1 row */
- rowNums.add(null);
- }
- }
-
- void incrRowNum() { currentRowNum++; }
-
- void addRank()
- {
- if ( supportsStreaming ) {
- rowNums.set(0, new IntWritable(currentRank));
- } else {
- rowNums.add(new IntWritable(currentRank));
- }
- }
- }
-
- public static abstract class GenericUDAFAbstractRankEvaluator extends GenericUDAFEvaluator
- {
- ObjectInspector[] inputOI;
- ObjectInspector[] outputOI;
- boolean isStreamingMode = false;
-
- protected boolean isStreaming() {
- return isStreamingMode;
- }
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException
- {
- super.init(m, parameters);
- if (m != Mode.COMPLETE)
- {
- throw new HiveException(
- "Only COMPLETE mode supported for Rank function");
- }
- inputOI = parameters;
- outputOI = new ObjectInspector[inputOI.length];
- for(int i=0; i < inputOI.length; i++)
- {
- outputOI[i] = ObjectInspectorUtils.getStandardObjectInspector(inputOI[i], ObjectInspectorCopyOption.JAVA);
- }
- return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException
- {
- return new RankBuffer(inputOI.length, isStreamingMode);
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException
- {
- ((RankBuffer) agg).init();
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException
- {
- RankBuffer rb = (RankBuffer) agg;
- int c = GenericUDAFRank.compare(rb.currVal, outputOI, parameters, inputOI);
- rb.incrRowNum();
- if ( rb.currentRowNum == 1 || c != 0 )
- {
- nextRank(rb);
- rb.currVal = GenericUDAFRank.copyToStandardObject(parameters, inputOI, ObjectInspectorCopyOption.JAVA);
- }
- rb.addRank();
- }
-
- /*
- * Called when the value in the partition has changed. Update the currentRank
- */
- protected void nextRank(RankBuffer rb)
- {
- rb.currentRank = rb.currentRowNum;
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg) throws HiveException
- {
- throw new HiveException("terminatePartial not supported");
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial) throws HiveException
- {
- throw new HiveException("merge not supported");
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
- return ((RankBuffer) agg).rowNums;
- }
-
- }
-
- public static class GenericUDAFRankEvaluator extends
- GenericUDAFAbstractRankEvaluator implements
- ISupportStreamingModeForWindowing {
+@WindowFunctionDescription(
+ description = @Description(
+ name = "rank",
+ value = "_FUNC_(x)"),
+ supportsWindow = false,
+ pivotResult = true,
+ rankingFunction = true,
+ impliesOrder = true)
+public class GenericUDAFRank extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFRank.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length < 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1,
+ "One or more arguments are expected.");
+ }
+ for (int i = 0; i < parameters.length; i++) {
+ ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[i]);
+ if (!ObjectInspectorUtils.compareSupported(oi)) {
+ throw new UDFArgumentTypeException(i,
+ "Cannot support comparison of map<> type or complex type containing map<>.");
+ }
+ }
+ return createEvaluator();
+ }
+
+ protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ return new GenericUDAFRankEvaluator();
+ }
+
+ static class RankBuffer implements AggregationBuffer {
+
+ ArrayList<IntWritable> rowNums;
+ int currentRowNum;
+ Object[] currVal;
+ int currentRank;
+ int numParams;
+ boolean supportsStreaming;
+
+ RankBuffer(int numParams, boolean supportsStreaming) {
+ this.numParams = numParams;
+ this.supportsStreaming = supportsStreaming;
+ init();
+ }
+
+ void init() {
+ rowNums = new ArrayList<IntWritable>();
+ currentRowNum = 0;
+ currentRank = 0;
+ currVal = new Object[numParams];
+ if (supportsStreaming) {
+ /* initialize rowNums to have 1 row */
+ rowNums.add(null);
+ }
+ }
+
+ void incrRowNum() { currentRowNum++; }
+
+ void addRank() {
+ if (supportsStreaming) {
+ rowNums.set(0, new IntWritable(currentRank));
+ } else {
+ rowNums.add(new IntWritable(currentRank));
+ }
+ }
+ }
+
+ public static abstract class GenericUDAFAbstractRankEvaluator extends GenericUDAFEvaluator {
+
+ ObjectInspector[] inputOI;
+ ObjectInspector[] outputOI;
+ boolean isStreamingMode = false;
+
+ protected boolean isStreaming() {
+ return isStreamingMode;
+ }
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ if (m != Mode.COMPLETE) {
+ throw new HiveException("Only COMPLETE mode supported for Rank function");
+ }
+ inputOI = parameters;
+ outputOI = new ObjectInspector[inputOI.length];
+ for (int i = 0; i < inputOI.length; i++) {
+ outputOI[i] = ObjectInspectorUtils.getStandardObjectInspector(inputOI[i],
+ ObjectInspectorCopyOption.JAVA);
+ }
+ return ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new RankBuffer(inputOI.length, isStreamingMode);
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((RankBuffer) agg).init();
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ RankBuffer rb = (RankBuffer) agg;
+ int c = GenericUDAFRank.compare(rb.currVal, outputOI, parameters, inputOI);
+ rb.incrRowNum();
+ if (rb.currentRowNum == 1 || c != 0) {
+ nextRank(rb);
+ rb.currVal =
+ GenericUDAFRank.copyToStandardObject(parameters, inputOI, ObjectInspectorCopyOption.JAVA);
+ }
+ rb.addRank();
+ }
+
+ /*
+ * Called when the value in the partition has changed. Update the currentRank
+ */
+ protected void nextRank(RankBuffer rb) {
+ rb.currentRank = rb.currentRowNum;
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ throw new HiveException("terminatePartial not supported");
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ throw new HiveException("merge not supported");
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ return ((RankBuffer) agg).rowNums;
+ }
+
+ }
+
+ public static class GenericUDAFRankEvaluator extends GenericUDAFAbstractRankEvaluator
+ implements ISupportStreamingModeForWindowing {
@Override
public Object getNextResult(AggregationBuffer agg) throws HiveException {
@@ -215,18 +195,15 @@ public class GenericUDAFRank extends Abs
}
@Override
- public int getRowsRemainingAfterTerminate()
- throws HiveException {
+ public int getRowsRemainingAfterTerminate() throws HiveException {
return 0;
}
}
public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2,
- ObjectInspector[] oi2)
- {
+ ObjectInspector[] oi2) {
int c = 0;
- for (int i = 0; i < oi1.length; i++)
- {
+ for (int i = 0; i < oi1.length; i++) {
c = ObjectInspectorUtils.compare(o1[i], oi1[i], o2[i], oi2[i]);
if (c != 0) {
return c;
@@ -235,15 +212,11 @@ public class GenericUDAFRank extends Abs
return c;
}
- public static Object[] copyToStandardObject(Object[] o,
- ObjectInspector[] oi,
- ObjectInspectorCopyOption objectInspectorOption)
- {
+ public static Object[] copyToStandardObject(Object[] o, ObjectInspector[] oi,
+ ObjectInspectorCopyOption objectInspectorOption) {
Object[] out = new Object[o.length];
- for (int i = 0; i < oi.length; i++)
- {
- out[i] = ObjectInspectorUtils.copyToStandardObject(o[i], oi[i],
- objectInspectorOption);
+ for (int i = 0; i < oi.length; i++) {
+ out[i] = ObjectInspectorUtils.copyToStandardObject(o[i], oi[i], objectInspectorOption);
}
return out;
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java Mon Sep 8 04:38:17 2014
@@ -34,110 +34,89 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.IntWritable;
-@WindowFunctionDescription
-(
- description = @Description(
- name = "row_number",
- value = "_FUNC_() - The ROW_NUMBER function assigns a unique number (sequentially, starting from 1, as defined by ORDER BY) to each row within the partition."
- ),
- supportsWindow = false,
- pivotResult = true
+@WindowFunctionDescription(
+ description = @Description(
+ name = "row_number",
+ value = "_FUNC_() - The ROW_NUMBER function assigns a unique number (sequentially, starting "
+ + "from 1, as defined by ORDER BY) to each row within the partition."
+ ),
+ supportsWindow = false,
+ pivotResult = true
)
-public class GenericUDAFRowNumber extends AbstractGenericUDAFResolver
-{
- static final Log LOG = LogFactory.getLog(GenericUDAFRowNumber.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException
- {
- if (parameters.length != 0)
- {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "No argument is expected.");
- }
- return new GenericUDAFRowNumberEvaluator();
- }
-
- static class RowNumberBuffer implements AggregationBuffer
- {
- ArrayList<IntWritable> rowNums;
- int nextRow;
-
- void init()
- {
- rowNums = new ArrayList<IntWritable>();
- }
-
- RowNumberBuffer()
- {
- init();
- nextRow = 1;
- }
-
- void incr()
- {
- rowNums.add(new IntWritable(nextRow++));
- }
- }
-
- public static class GenericUDAFRowNumberEvaluator extends
- GenericUDAFEvaluator
- {
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException
- {
- super.init(m, parameters);
- if (m != Mode.COMPLETE)
- {
- throw new HiveException("Only COMPLETE mode supported for row_number function");
- }
-
- return ObjectInspectorFactory.getStandardListObjectInspector(
- PrimitiveObjectInspectorFactory.writableIntObjectInspector);
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException
- {
- return new RowNumberBuffer();
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException
- {
- ((RowNumberBuffer) agg).init();
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException
- {
- ((RowNumberBuffer) agg).incr();
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException
- {
- throw new HiveException("terminatePartial not supported");
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException
- {
- throw new HiveException("merge not supported");
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException
- {
- return ((RowNumberBuffer) agg).rowNums;
- }
+public class GenericUDAFRowNumber extends AbstractGenericUDAFResolver {
- }
+ static final Log LOG = LogFactory.getLog(GenericUDAFRowNumber.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 0) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "No argument is expected.");
+ }
+ return new GenericUDAFRowNumberEvaluator();
+ }
+
+ static class RowNumberBuffer implements AggregationBuffer {
+
+ ArrayList<IntWritable> rowNums;
+ int nextRow;
+
+ void init() {
+ rowNums = new ArrayList<IntWritable>();
+ }
+
+ RowNumberBuffer() {
+ init();
+ nextRow = 1;
+ }
+
+ void incr() {
+ rowNums.add(new IntWritable(nextRow++));
+ }
+ }
+
+ public static class GenericUDAFRowNumberEvaluator extends GenericUDAFEvaluator {
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ if (m != Mode.COMPLETE) {
+ throw new HiveException("Only COMPLETE mode supported for row_number function");
+ }
+
+ return ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new RowNumberBuffer();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((RowNumberBuffer) agg).init();
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ ((RowNumberBuffer) agg).incr();
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ throw new HiveException("terminatePartial not supported");
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ throw new HiveException("merge not supported");
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ return ((RowNumberBuffer) agg).rowNums;
+ }
+
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java Mon Sep 8 04:38:17 2014
@@ -46,7 +46,7 @@ public abstract class GenericUDFBasePad
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 3) {
throw new UDFArgumentException(udfName + " requires three arguments. Found :"
- + arguments.length);
+ + arguments.length);
}
converter1 = checkTextArguments(arguments, 0);
converter2 = checkIntArguments(arguments, 1);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java Mon Sep 8 04:38:17 2014
@@ -40,14 +40,14 @@ public abstract class GenericUDFBaseTrim
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentException(udfName + " requires one value argument. Found :"
- + arguments.length);
+ + arguments.length);
}
PrimitiveObjectInspector argumentOI;
if(arguments[0] instanceof PrimitiveObjectInspector) {
argumentOI = (PrimitiveObjectInspector) arguments[0];
} else {
throw new UDFArgumentException(udfName + " takes only primitive types. found "
- + arguments[0].getTypeName());
+ + arguments[0].getTypeName());
}
switch (argumentOI.getPrimitiveCategory()) {
case STRING:
@@ -56,7 +56,7 @@ public abstract class GenericUDFBaseTrim
break;
default:
throw new UDFArgumentException(udfName + " takes only STRING/CHAR/VARCHAR types. Found "
- + argumentOI.getPrimitiveCategory());
+ + argumentOI.getPrimitiveCategory());
}
converter = new TextConverter(argumentOI);
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java Mon Sep 8 04:38:17 2014
@@ -51,7 +51,7 @@ public class GenericUDFBetween extends G
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
boolean invert = (Boolean) ((PrimitiveObjectInspector) argumentOIs[0])
- .getPrimitiveJavaObject(arguments[0].get());
+ .getPrimitiveJavaObject(arguments[0].get());
BooleanWritable left = ((BooleanWritable)egt.evaluate(new DeferredObject[] {arguments[1], arguments[2]}));
if (left == null) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java Mon Sep 8 04:38:17 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.common.typ
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -128,7 +129,7 @@ public class GenericUDFBridge extends Ge
public Class<? extends UDF> getUdfClass() {
try {
- return (Class<? extends UDF>) Class.forName(udfClassName, true, JavaUtils.getClassLoader());
+ return (Class<? extends UDF>) Class.forName(udfClassName, true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
@@ -138,7 +139,7 @@ public class GenericUDFBridge extends Ge
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
try {
- udf = (UDF) Class.forName(udfClassName, true, JavaUtils.getClassLoader()).newInstance();
+ udf = (UDF) Class.forName(udfClassName, true, Utilities.getSessionSpecifiedClassLoader()).newInstance();
} catch (Exception e) {
throw new UDFArgumentException(
"Unable to instantiate UDF implementation class " + udfClassName + ": " + e);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java Mon Sep 8 04:38:17 2014
@@ -23,9 +23,13 @@ import org.apache.hadoop.hive.common.typ
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -49,8 +53,11 @@ extended = "Returns NULL if any argument
+ "Example:\n"
+ " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n"
+ " 'abcdef'")
-@VectorizedExpressions({StringConcatColCol.class, StringConcatColScalar.class,
- StringConcatScalarCol.class})
+@VectorizedExpressions({StringGroupConcatColCol.class,
+ StringGroupColConcatStringScalar.class,
+ StringGroupColConcatCharScalar.class, StringGroupColConcatVarCharScalar.class,
+ StringScalarConcatStringGroupCol.class,
+ CharScalarConcatStringGroupCol.class, VarCharScalarConcatStringGroupCol.class})
public class GenericUDFConcat extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
private transient StringConverter[] stringConverters;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java Mon Sep 8 04:38:17 2014
@@ -68,7 +68,7 @@ public class GenericUDFDateDiff extends
private transient PrimitiveCategory inputType1;
private transient PrimitiveCategory inputType2;
private IntWritable result = new IntWritable();
-
+
public GenericUDFDateDiff() {
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
}
@@ -89,8 +89,8 @@ public class GenericUDFDateDiff extends
@Override
public IntWritable evaluate(DeferredObject[] arguments) throws HiveException {
- output = evaluate(convertToDate(inputType1, inputConverter1, arguments[0]),
- convertToDate(inputType2, inputConverter2, arguments[1]));
+ output = evaluate(convertToDate(inputType1, inputConverter1, arguments[0]),
+ convertToDate(inputType2, inputConverter2, arguments[1]));
return output;
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java Mon Sep 8 04:38:17 2014
@@ -44,8 +44,8 @@ import org.apache.hadoop.io.Text;
@Description(name = "decode",
value = "_FUNC_(bin, str) - Decode the first argument using the second argument character set",
extended = "Possible options for the character set are 'US_ASCII', 'ISO-8859-1',\n" +
- "'UTF-8', 'UTF-16BE', 'UTF-16LE', and 'UTF-16'. If either argument\n" +
- "is null, the result will also be null")
+ "'UTF-8', 'UTF-16BE', 'UTF-16LE', and 'UTF-16'. If either argument\n" +
+ "is null, the result will also be null")
public class GenericUDFDecode extends GenericUDF {
private transient CharsetDecoder decoder = null;
private transient BinaryObjectInspector bytesOI = null;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java Mon Sep 8 04:38:17 2014
@@ -41,10 +41,20 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarCharScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarVarCharScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringScalar;
+
+
/**
* IF(expr1,expr2,expr3) <br>
@@ -60,8 +70,14 @@ import org.apache.hadoop.hive.ql.exec.ve
IfExprLongScalarDoubleColumn.class, IfExprDoubleScalarLongColumn.class,
IfExprLongScalarLongScalar.class, IfExprDoubleScalarDoubleScalar.class,
IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class,
- IfExprStringColumnStringColumn.class, IfExprStringColumnStringScalar.class,
- IfExprStringScalarStringColumn.class, IfExprStringScalarStringScalar.class
+ IfExprStringGroupColumnStringGroupColumn.class,
+ IfExprStringGroupColumnStringScalar.class,
+ IfExprStringGroupColumnCharScalar.class, IfExprStringGroupColumnVarCharScalar.class,
+ IfExprStringScalarStringGroupColumn.class,
+ IfExprCharScalarStringGroupColumn.class, IfExprVarCharScalarStringGroupColumn.class,
+ IfExprStringScalarStringScalar.class,
+ IfExprStringScalarCharScalar.class, IfExprStringScalarVarCharScalar.class,
+ IfExprCharScalarStringScalar.class, IfExprVarCharScalarStringScalar.class,
})
public class GenericUDFIf extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java Mon Sep 8 04:38:17 2014
@@ -46,7 +46,7 @@ public class GenericUDFLpad extends Gene
// Copy the padding
for (int i = 0; i < pos; i += pad.getLength()) {
for (int j = 0; j < pad.getLength() && j < pos - i; j++) {
- data[i + j] = padTxt[j];
+ data[i + j] = padTxt[j];
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java Mon Sep 8 04:38:17 2014
@@ -34,9 +34,14 @@ import org.apache.hadoop.hive.serde2.obj
DoubleColEqualLongScalar.class, DoubleColEqualDoubleScalar.class,
LongScalarEqualLongColumn.class, LongScalarEqualDoubleColumn.class,
DoubleScalarEqualLongColumn.class, DoubleScalarEqualDoubleColumn.class,
- StringColEqualStringColumn.class, StringColEqualStringScalar.class,
- StringScalarEqualStringColumn.class, FilterStringColEqualStringColumn.class,
- FilterStringColEqualStringScalar.class, FilterStringScalarEqualStringColumn.class,
+ StringGroupColEqualStringGroupColumn.class, FilterStringGroupColEqualStringGroupColumn.class,
+ StringGroupColEqualStringScalar.class,
+ StringGroupColEqualVarCharScalar.class, StringGroupColEqualCharScalar.class,
+ StringScalarEqualStringGroupColumn.class,
+ VarCharScalarEqualStringGroupColumn.class, CharScalarEqualStringGroupColumn.class,
+ FilterStringGroupColEqualStringScalar.class, FilterStringScalarEqualStringGroupColumn.class,
+ FilterStringGroupColEqualVarCharScalar.class, FilterVarCharScalarEqualStringGroupColumn.class,
+ FilterStringGroupColEqualCharScalar.class, FilterCharScalarEqualStringGroupColumn.class,
FilterLongColEqualLongColumn.class, FilterLongColEqualDoubleColumn.class,
FilterDoubleColEqualLongColumn.class, FilterDoubleColEqualDoubleColumn.class,
FilterLongColEqualLongScalar.class, FilterLongColEqualDoubleScalar.class,
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java?rev=1623263&r1=1623262&r2=1623263&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java Mon Sep 8 04:38:17 2014
@@ -35,9 +35,14 @@ import org.apache.hadoop.io.Text;
DoubleColGreaterEqualLongScalar.class, DoubleColGreaterEqualDoubleScalar.class,
LongScalarGreaterEqualLongColumn.class, LongScalarGreaterEqualDoubleColumn.class,
DoubleScalarGreaterEqualLongColumn.class, DoubleScalarGreaterEqualDoubleColumn.class,
- StringColGreaterEqualStringColumn.class, StringColGreaterEqualStringScalar.class,
- StringScalarGreaterEqualStringColumn.class, FilterStringColGreaterEqualStringColumn.class,
- FilterStringColGreaterEqualStringScalar.class, FilterStringScalarGreaterEqualStringColumn.class,
+ StringGroupColGreaterEqualStringGroupColumn.class, FilterStringGroupColGreaterEqualStringGroupColumn.class,
+ StringGroupColGreaterEqualStringScalar.class,
+ StringGroupColGreaterEqualVarCharScalar.class, StringGroupColGreaterEqualCharScalar.class,
+ StringScalarGreaterEqualStringGroupColumn.class,
+ VarCharScalarGreaterEqualStringGroupColumn.class, CharScalarGreaterEqualStringGroupColumn.class,
+ FilterStringGroupColGreaterEqualStringScalar.class, FilterStringScalarGreaterEqualStringGroupColumn.class,
+ FilterStringGroupColGreaterEqualVarCharScalar.class, FilterVarCharScalarGreaterEqualStringGroupColumn.class,
+ FilterStringGroupColGreaterEqualCharScalar.class, FilterCharScalarGreaterEqualStringGroupColumn.class,
FilterLongColGreaterEqualLongColumn.class, FilterLongColGreaterEqualDoubleColumn.class,
FilterDoubleColGreaterEqualLongColumn.class, FilterDoubleColGreaterEqualDoubleColumn.class,
FilterLongColGreaterEqualLongScalar.class, FilterLongColGreaterEqualDoubleScalar.class,