You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/30 21:45:40 UTC
svn commit: r1477796 [1/2] - in /hive/branches/vectorization: ./ common/
common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/ hcatalog/
jdbc/src/java/org/apache/hive/jdbc/ jdbc/src/test/org/apache/hive/jdbc/
ql/src/java/org/apache/hadoop/hive...
Author: hashutosh
Date: Tue Apr 30 19:45:38 2013
New Revision: 1477796
URL: http://svn.apache.org/r1477796
Log:
Merged in latest trunk
Added:
hive/branches/vectorization/data/files/array_table.txt
- copied unchanged from r1477793, hive/trunk/data/files/array_table.txt
hive/branches/vectorization/data/files/map_table.txt
- copied unchanged from r1477793, hive/trunk/data/files/map_table.txt
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
- copied unchanged from r1477793, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
hive/branches/vectorization/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
- copied unchanged from r1477793, hive/trunk/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
hive/branches/vectorization/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
- copied unchanged from r1477793, hive/trunk/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
hive/branches/vectorization/ql/src/test/results/clientpositive/insert_overwrite_local_directory_1.q.out
- copied unchanged from r1477793, hive/trunk/ql/src/test/results/clientpositive/insert_overwrite_local_directory_1.q.out
hive/branches/vectorization/ql/src/test/results/clientpositive/ptf_register_tblfn.q.out
- copied unchanged from r1477793, hive/trunk/ql/src/test/results/clientpositive/ptf_register_tblfn.q.out
Removed:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFFunctionInfo.java
Modified:
hive/branches/vectorization/ (props changed)
hive/branches/vectorization/common/build.xml
hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/branches/vectorization/conf/hive-default.xml.template
hive/branches/vectorization/hcatalog/build.xml
hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java
hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q
hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q
hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q
hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q
hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out
hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out
hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out
hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out
hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out
hive/branches/vectorization/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
Propchange: hive/branches/vectorization/
------------------------------------------------------------------------------
Merged /hive/trunk:r1476329-1477793
Modified: hive/branches/vectorization/common/build.xml
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/common/build.xml?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/common/build.xml (original)
+++ hive/branches/vectorization/common/build.xml Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ to call at top-level: ant deploy-contrib
<project name="common" default="jar">
<property name="src.dir" location="${basedir}/src/java"/>
+ <property name="src.gen.dir" location="${basedir}/src/gen"/>
<import file="../build-common.xml"/>
<target name="compile" depends="init, setup, ivy-retrieve">
@@ -36,7 +37,7 @@ to call at top-level: ant deploy-contrib
</exec>
<javac
encoding="${build.encoding}"
- srcdir="${src.dir}"
+ srcdir="${src.dir}:${src.gen.dir}"
includes="**/*.java"
destdir="${build.classes}"
debug="${javac.debug}"
Modified: hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Apr 30 19:45:38 2013
@@ -415,8 +415,6 @@ public class HiveConf extends Configurat
HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
- HIVEMAPJOINROWSIZE("hive.mapjoin.size.key", 10000),
- HIVEMAPJOINCACHEROWS("hive.mapjoin.cache.numrows", 25000),
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5),
HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3),
Modified: hive/branches/vectorization/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/conf/hive-default.xml.template?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/conf/hive-default.xml.template (original)
+++ hive/branches/vectorization/conf/hive-default.xml.template Tue Apr 30 19:45:38 2013
@@ -595,12 +595,6 @@
</property>
<property>
- <name>hive.mapjoin.cache.numrows</name>
- <value>25000</value>
- <description>How many rows should be cached by jdbm for map join. </description>
-</property>
-
-<property>
<name>hive.optimize.skewjoin</name>
<value>false</value>
<description>Whether to enable skew join optimization.
Modified: hive/branches/vectorization/hcatalog/build.xml
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/hcatalog/build.xml?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/hcatalog/build.xml (original)
+++ hive/branches/vectorization/hcatalog/build.xml Tue Apr 30 19:45:38 2013
@@ -91,18 +91,23 @@
<target name="gen-test" description="Generate tests, a no-op for hcat"/>
<target name="test" depends="jar" description="run unit tests">
- <ant target="test" dir="core" inheritAll="false"/>
- <ant target="test" dir="hcatalog-pig-adapter" inheritAll="false"/>
- <ant target="test" dir="server-extensions" inheritAll="false"/>
- <ant target="test" dir="webhcat/svr" inheritAll="false"/>
- <ant target="test" dir="webhcat/java-client" inheritAll="false"/>
- <ant target="test" dir="storage-handlers/hbase" inheritAll="false"/>
- <!-- One checkstyle run for the whole repo. Runs after junit tests
- to piggyback on resolved jars. -->
- <path id="checkstyle.class.path">
- <fileset dir="core/build/lib/test"/>
- </path>
- <antcall target="checkstyle" inheritRefs="true"/>
+ <!-- Placed in a parallel structure so that the tests keep going
+ even if some fail. Otherwise a failure in one of the earlier ant
+ call terminates the target and the rest do not run. -->
+ <parallel threadCount="1">
+ <ant target="test" dir="core" inheritAll="false"/>
+ <ant target="test" dir="hcatalog-pig-adapter" inheritAll="false"/>
+ <ant target="test" dir="server-extensions" inheritAll="false"/>
+ <ant target="test" dir="webhcat/svr" inheritAll="false"/>
+ <ant target="test" dir="webhcat/java-client" inheritAll="false"/>
+ <ant target="test" dir="storage-handlers/hbase" inheritAll="false"/>
+ <!-- One checkstyle run for the whole repo. Runs after junit tests
+ to piggyback on resolved jars. -->
+ <path id="checkstyle.class.path">
+ <fileset dir="core/build/lib/test"/>
+ </path>
+ <antcall target="checkstyle" inheritRefs="true"/>
+ </parallel>
</target>
<target name="compile-test" depends="jar" description="compile unit tests">
Modified: hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java (original)
+++ hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java Tue Apr 30 19:45:38 2013
@@ -28,12 +28,16 @@ import java.util.Comparator;
import java.util.jar.Attributes;
import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hive.service.cli.GetInfoType;
+import org.apache.hive.service.cli.thrift.TCLIService;
import org.apache.hive.service.cli.thrift.TGetCatalogsReq;
import org.apache.hive.service.cli.thrift.TGetCatalogsResp;
import org.apache.hive.service.cli.thrift.TGetColumnsReq;
import org.apache.hive.service.cli.thrift.TGetColumnsResp;
import org.apache.hive.service.cli.thrift.TGetFunctionsReq;
import org.apache.hive.service.cli.thrift.TGetFunctionsResp;
+import org.apache.hive.service.cli.thrift.TGetInfoReq;
+import org.apache.hive.service.cli.thrift.TGetInfoResp;
import org.apache.hive.service.cli.thrift.TGetSchemasReq;
import org.apache.hive.service.cli.thrift.TGetSchemasResp;
import org.apache.hive.service.cli.thrift.TGetTableTypesReq;
@@ -42,7 +46,6 @@ import org.apache.hive.service.cli.thrif
import org.apache.hive.service.cli.thrift.TGetTablesResp;
import org.apache.hive.service.cli.thrift.TGetTypeInfoReq;
import org.apache.hive.service.cli.thrift.TGetTypeInfoResp;
-import org.apache.hive.service.cli.thrift.TCLIService;
import org.apache.hive.service.cli.thrift.TSessionHandle;
import org.apache.thrift.TException;
@@ -249,8 +252,17 @@ public class HiveDatabaseMetaData implem
}
public String getDatabaseProductVersion() throws SQLException {
- // TODO: Fetch this from the server side
- return "0.10.0";
+
+ TGetInfoReq req = new TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_VER.toTGetInfoType());
+ TGetInfoResp resp;
+ try {
+ resp = client.GetInfo(req);
+ } catch (TException e) {
+ throw new SQLException(e.getMessage(), "08S01", e);
+ }
+ Utils.verifySuccess(resp.getStatus());
+
+ return resp.getInfoValue().getStringValue();
}
public int getDefaultTransactionIsolation() throws SQLException {
Modified: hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java (original)
+++ hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java Tue Apr 30 19:45:38 2013
@@ -35,11 +35,13 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import java.util.regex.Pattern;
import junit.framework.TestCase;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hive.common.util.HiveVersionInfo;
/**
* TestJdbcDriver2
@@ -833,7 +835,11 @@ public class TestJdbcDriver2 extends Tes
DatabaseMetaData meta = con.getMetaData();
assertEquals("Hive", meta.getDatabaseProductName());
- assertEquals("0.10.0", meta.getDatabaseProductVersion());
+ assertEquals(HiveVersionInfo.getVersion(), meta.getDatabaseProductVersion());
+ assertEquals(System.getProperty("hive.version"), meta.getDatabaseProductVersion());
+ assertTrue("verifying hive version pattern. got " + meta.getDatabaseProductVersion(),
+ Pattern.matches("\\d+\\.\\d+\\.\\d+.*", meta.getDatabaseProductVersion()) );
+
assertEquals(DatabaseMetaData.sqlStateSQL99, meta.getSQLStateType());
assertFalse(meta.supportsCatalogsInTableDefinitions());
assertFalse(meta.supportsSchemasInTableDefinitions());
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java Tue Apr 30 19:45:38 2013
@@ -53,7 +53,6 @@ public abstract class AbstractMapJoinOpe
protected transient List<ObjectInspector>[] joinKeysStandardObjectInspectors;
protected transient byte posBigTable = -1; // one of the tables that is not in memory
- transient int mapJoinRowsKey; // rows for a given key
protected transient RowContainer<ArrayList<Object>> emptyList = null;
@@ -104,9 +103,6 @@ public abstract class AbstractMapJoinOpe
!hasFilter(posBigTable), reporter);
storage[posBigTable] = bigPosRC;
- mapJoinRowsKey = HiveConf.getIntVar(hconf,
- HiveConf.ConfVars.HIVEMAPJOINROWSIZE);
-
List<? extends StructField> structFields = ((StructObjectInspector) outputObjInspector)
.getAllStructFieldRefs();
if (conf.getOutputColumnNames().size() < structFields.size()) {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java Tue Apr 30 19:45:38 2013
@@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver;
+import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction;
/**
* FunctionInfo.
@@ -32,6 +34,8 @@ public class FunctionInfo {
private final boolean isNative;
+ private final boolean isInternalTableFunction;
+
private final String displayName;
private GenericUDF genericUDF;
@@ -40,11 +44,14 @@ public class FunctionInfo {
private GenericUDAFResolver genericUDAFResolver;
+ private Class<? extends TableFunctionResolver> tableFunctionResolver;
+
public FunctionInfo(boolean isNative, String displayName,
GenericUDF genericUDF) {
this.isNative = isNative;
this.displayName = displayName;
this.genericUDF = genericUDF;
+ this.isInternalTableFunction = false;
}
public FunctionInfo(boolean isNative, String displayName,
@@ -52,6 +59,7 @@ public class FunctionInfo {
this.isNative = isNative;
this.displayName = displayName;
this.genericUDAFResolver = genericUDAFResolver;
+ this.isInternalTableFunction = false;
}
public FunctionInfo(boolean isNative, String displayName,
@@ -59,6 +67,16 @@ public class FunctionInfo {
this.isNative = isNative;
this.displayName = displayName;
this.genericUDTF = genericUDTF;
+ this.isInternalTableFunction = false;
+ }
+
+ public FunctionInfo(String displayName, Class<? extends TableFunctionResolver> tFnCls)
+ {
+ this.displayName = displayName;
+ this.tableFunctionResolver = tFnCls;
+ PartitionTableFunctionDescription def = tableFunctionResolver.getAnnotation(PartitionTableFunctionDescription.class);
+ this.isNative = (def == null) ? false : def.isInternal();
+ this.isInternalTableFunction = isNative;
}
/**
@@ -90,6 +108,8 @@ public class FunctionInfo {
return genericUDAFResolver;
}
+
+
/**
* Get the Class of the UDF.
*/
@@ -109,6 +129,9 @@ public class FunctionInfo {
} else if (isGenericUDTF()) {
return genericUDTF.getClass();
}
+ if(isTableFunction()) {
+ return this.tableFunctionResolver;
+ }
return null;
}
@@ -131,6 +154,14 @@ public class FunctionInfo {
}
/**
+ * Internal table functions cannot be used in the language.
+ * {@link WindowingTableFunction}
+ */
+ public boolean isInternalTableFunction() {
+ return isInternalTableFunction;
+ }
+
+ /**
* @return TRUE if the function is a GenericUDF
*/
public boolean isGenericUDF() {
@@ -150,4 +181,11 @@ public class FunctionInfo {
public boolean isGenericUDTF() {
return null != genericUDTF;
}
+
+ /**
+ * @return TRUE if the function is a Table Function
+ */
+ public boolean isTableFunction() {
+ return null != tableFunctionResolver;
+ }
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Tue Apr 30 19:45:38 2013
@@ -182,7 +182,6 @@ public final class FunctionRegistry {
public static final String NOOP_TABLE_FUNCTION = "noop";
public static final String NOOP_MAP_TABLE_FUNCTION = "noopwithmap";
- static Map<String, PTFFunctionInfo> tableFunctions = Collections.synchronizedMap(new LinkedHashMap<String, PTFFunctionInfo>());
static Map<String, WindowFunctionInfo> windowFunctions = Collections.synchronizedMap(new LinkedHashMap<String, WindowFunctionInfo>());
/*
@@ -1294,6 +1293,9 @@ public final class FunctionRegistry {
FunctionRegistry.registerTemporaryGenericUDAF(
functionName, (GenericUDAFResolver)
ReflectionUtils.newInstance(udfClass, null));
+ } else if(TableFunctionResolver.class.isAssignableFrom(udfClass)) {
+ FunctionRegistry.registerTableFunction(
+ functionName, (Class<? extends TableFunctionResolver>)udfClass);
} else {
return false;
}
@@ -1406,14 +1408,17 @@ public final class FunctionRegistry {
public static boolean isTableFunction(String name)
{
- PTFFunctionInfo tFInfo = tableFunctions.get(name.toLowerCase());
- return tFInfo != null && !tFInfo.isInternal();
+ FunctionInfo tFInfo = mFunctions.get(name.toLowerCase());
+ return tFInfo != null && !tFInfo.isInternalTableFunction() && tFInfo.isTableFunction();
}
public static TableFunctionResolver getTableFunctionResolver(String name)
{
- PTFFunctionInfo tfInfo = tableFunctions.get(name.toLowerCase());
- return (TableFunctionResolver) ReflectionUtils.newInstance(tfInfo.getFunctionResolver(), null);
+ FunctionInfo tfInfo = mFunctions.get(name.toLowerCase());
+ if(tfInfo.isTableFunction()) {
+ return (TableFunctionResolver) ReflectionUtils.newInstance(tfInfo.getFunctionClass(), null);
+ }
+ return null;
}
public static TableFunctionResolver getWindowingTableFunction()
@@ -1428,8 +1433,8 @@ public final class FunctionRegistry {
public static void registerTableFunction(String name, Class<? extends TableFunctionResolver> tFnCls)
{
- PTFFunctionInfo tInfo = new PTFFunctionInfo(name, tFnCls);
- tableFunctions.put(name.toLowerCase(), tInfo);
+ FunctionInfo tInfo = new FunctionInfo(name, tFnCls);
+ mFunctions.put(name.toLowerCase(), tInfo);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Tue Apr 30 19:45:38 2013
@@ -151,35 +151,13 @@ public class GroupByOperator extends Ope
private List<FastBitSet> groupingSetsBitSet;
transient private List<Object> newKeysGroupingSets;
- /**
- * This is used to store the position and field names for variable length
- * fields.
- **/
- class varLenFields {
- int aggrPos;
- List<Field> fields;
-
- varLenFields(int aggrPos, List<Field> fields) {
- this.aggrPos = aggrPos;
- this.fields = fields;
- }
-
- int getAggrPos() {
- return aggrPos;
- }
-
- List<Field> getFields() {
- return fields;
- }
- };
-
// for these positions, some variable primitive type (String) is used, so size
// cannot be estimated. sample it at runtime.
transient List<Integer> keyPositionsSize;
// for these positions, some variable primitive type (String) is used for the
// aggregation classes
- transient List<varLenFields> aggrPositions;
+ transient List<Field>[] aggrPositions;
transient int fixedRowSize;
transient long maxHashTblMemory;
@@ -383,7 +361,7 @@ public class GroupByOperator extends Ope
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
- aggrPositions = new ArrayList<varLenFields>();
+ aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
@@ -523,21 +501,10 @@ public class GroupByOperator extends Ope
}
if (c.isInstance(new String()) || c.isInstance(new ByteArrayRef())) {
- int idx = 0;
- varLenFields v = null;
- for (idx = 0; idx < aggrPositions.size(); idx++) {
- v = aggrPositions.get(idx);
- if (v.getAggrPos() == pos) {
- break;
- }
+ if (aggrPositions[pos] == null) {
+ aggrPositions[pos] = new ArrayList<Field>();
}
-
- if (idx == aggrPositions.size()) {
- v = new varLenFields(pos, new ArrayList<Field>());
- aggrPositions.add(v);
- }
-
- v.getFields().add(f);
+ aggrPositions[pos].add(f);
return javaObjectOverHead;
}
@@ -582,9 +549,11 @@ public class GroupByOperator extends Ope
for (int i = 0; i < aggregationEvaluators.length; i++) {
fixedRowSize += javaObjectOverHead;
- Class<? extends AggregationBuffer> agg = aggregationEvaluators[i]
- .getNewAggregationBuffer().getClass();
- Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg);
+ AggregationBuffer agg = aggregationEvaluators[i].getNewAggregationBuffer();
+ if (GenericUDAFEvaluator.isEstimable(agg)) {
+ continue;
+ }
+ Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg.getClass());
for (Field f : fArr) {
fixedRowSize += getSize(i, f.getType(), f);
}
@@ -968,29 +937,15 @@ public class GroupByOperator extends Ope
}
}
- AggregationBuffer[] aggs = null;
- if (aggrPositions.size() > 0) {
- KeyWrapper newKeyProber = newKeys.copyKey();
- aggs = hashAggregations.get(newKeyProber);
- }
-
- for (varLenFields v : aggrPositions) {
- int aggrPos = v.getAggrPos();
- List<Field> fieldsVarLen = v.getFields();
- AggregationBuffer agg = aggs[aggrPos];
-
- try {
- for (Field f : fieldsVarLen) {
- Object o = f.get(agg);
- if (o instanceof String){
- totalVariableSize += ((String)o).length();
- }
- else if (o instanceof ByteArrayRef){
- totalVariableSize += ((ByteArrayRef)o).getData().length;
- }
- }
- } catch (IllegalAccessException e) {
- assert false;
+ AggregationBuffer[] aggs = hashAggregations.get(newKeys);
+ for (int i = 0; i < aggs.length; i++) {
+ AggregationBuffer agg = aggs[i];
+ if (GenericUDAFEvaluator.isEstimable(agg)) {
+ totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer)agg).estimate();
+ continue;
+ }
+ if (aggrPositions[i] != null) {
+ totalVariableSize += estimateSize(agg, aggrPositions[i]);
}
}
@@ -1010,6 +965,24 @@ public class GroupByOperator extends Ope
return false;
}
+ private int estimateSize(AggregationBuffer agg, List<Field> fields) {
+ int length = 0;
+ for (Field f : fields) {
+ try {
+ Object o = f.get(agg);
+ if (o instanceof String){
+ length += ((String)o).length();
+ }
+ else if (o instanceof ByteArrayRef){
+ length += ((ByteArrayRef)o).getData().length;
+ }
+ } catch (Exception e) {
+ // continue.. null out the field?
+ }
+ }
+ return length;
+ }
+
private void flush(boolean complete) throws HiveException {
countAfterReport = 0;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Apr 30 19:45:38 2013
@@ -70,7 +70,6 @@ public class HashTableSinkOperator exten
protected transient List<ObjectInspector>[] joinKeysStandardObjectInspectors;
protected transient int posBigTableAlias = -1; // one of the tables that is not in memory
- transient int mapJoinRowsKey; // rows for a given key
protected transient RowContainer<ArrayList<Object>> emptyList = null;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java Tue Apr 30 19:45:38 2013
@@ -1385,21 +1385,31 @@ public class RCFile {
try {
seek(position + 4); // skip escape
- in.readFully(syncCheck);
- int syncLen = sync.length;
- for (int i = 0; in.getPos() < end; i++) {
- int j = 0;
- for (; j < syncLen; j++) {
- if (sync[j] != syncCheck[(i + j) % syncLen]) {
- break;
+
+ int prefix = sync.length;
+ int n = conf.getInt("io.bytes.per.checksum", 512);
+ byte[] buffer = new byte[prefix+n];
+ n = (int)Math.min(n, end - in.getPos());
+ /* fill array with a pattern that will never match sync */
+ Arrays.fill(buffer, (byte)(~sync[0]));
+ while(n > 0 && (in.getPos() + n) <= end) {
+ position = in.getPos();
+ in.readFully(buffer, prefix, n);
+ /* the buffer has n+sync bytes */
+ for(int i = 0; i < n; i++) {
+ int j;
+ for(j = 0; j < sync.length && sync[j] == buffer[i+j]; j++) {
+ /* nothing */
+ }
+ if(j == sync.length) {
+ /* simplified from (position + (i - prefix) + sync.length) - SYNC_SIZE */
+ in.seek(position + i - SYNC_SIZE);
+ return;
}
}
- if (j == syncLen) {
- in.seek(in.getPos() - SYNC_SIZE); // position before
- // sync
- return;
- }
- syncCheck[i % syncLen] = in.readByte();
+ /* move the last 16 bytes to the prefix area */
+ System.arraycopy(buffer, buffer.length - prefix - 1, buffer, 0, prefix);
+ n = (int)Math.min(n, end - in.getPos());
}
} catch (ChecksumException e) { // checksum failure
handleChecksumException(e);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Tue Apr 30 19:45:38 2013
@@ -172,7 +172,7 @@ tableSample
tableSource
@init { gParent.msgs.push("table source"); }
@after { gParent.msgs.pop(); }
- : tabname=tableName (ts=tableSample)? (alias=identifier)?
+ : tabname=tableName (ts=tableSample)? (KW_AS? alias=identifier)?
-> ^(TOK_TABREF $tabname $ts? $alias?)
;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Tue Apr 30 19:45:38 2013
@@ -1878,7 +1878,7 @@ destination
@init { msgs.push("destination specification"); }
@after { msgs.pop(); }
:
- KW_LOCAL KW_DIRECTORY StringLiteral -> ^(TOK_LOCAL_DIR StringLiteral)
+ KW_LOCAL KW_DIRECTORY StringLiteral tableRowFormat? tableFileFormat? -> ^(TOK_LOCAL_DIR StringLiteral tableRowFormat? tableFileFormat?)
| KW_DIRECTORY StringLiteral -> ^(TOK_DIR StringLiteral)
| KW_TABLE tableOrPartition -> tableOrPartition
;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Tue Apr 30 19:45:38 2013
@@ -51,6 +51,7 @@ public class QB {
private boolean isQuery;
private boolean isAnalyzeRewrite;
private CreateTableDesc tblDesc = null; // table descriptor of the final
+ private CreateTableDesc localDirectoryDesc = null ;
// used by PTFs
/*
@@ -227,6 +228,14 @@ public class QB {
tblDesc = desc;
}
+ public CreateTableDesc getLLocalDirectoryDesc() {
+ return localDirectoryDesc;
+ }
+
+ public void setLocalDirectoryDesc(CreateTableDesc localDirectoryDesc) {
+ this.localDirectoryDesc = localDirectoryDesc;
+ }
+
/**
* Whether this QB is for a CREATE-TABLE-AS-SELECT.
*/
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Apr 30 19:45:38 2013
@@ -1192,6 +1192,10 @@ public class SemanticAnalyzer extends Ba
}
}
+ RowFormatParams rowFormatParams = new RowFormatParams();
+ AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars();
+ StorageFormat storageFormat = new StorageFormat();
+
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
@@ -1279,6 +1283,45 @@ public class SemanticAnalyzer extends Ba
}
qb.getMetaData().setDestForAlias(name, fname,
(ast.getToken().getType() == HiveParser.TOK_DIR));
+
+ CreateTableDesc localDirectoryDesc = new CreateTableDesc();
+ boolean localDirectoryDescIsSet = false;
+ int numCh = ast.getChildCount();
+ for (int num = 1; num < numCh ; num++){
+ ASTNode child = (ASTNode) ast.getChild(num);
+ if (ast.getChild(num) != null){
+ switch (child.getToken().getType()) {
+ case HiveParser.TOK_TABLEROWFORMAT:
+ rowFormatParams.analyzeRowFormat(shared, child);
+ localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
+ localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim);
+ localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
+ localDirectoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
+ localDirectoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
+ localDirectoryDescIsSet=true;
+ break;
+ case HiveParser.TOK_TABLESERIALIZER:
+ ASTNode serdeChild = (ASTNode) child.getChild(0);
+ shared.serde = unescapeSQLString(serdeChild.getChild(0).getText());
+ localDirectoryDesc.setSerName(shared.serde);
+ localDirectoryDescIsSet=true;
+ break;
+ case HiveParser.TOK_TBLSEQUENCEFILE:
+ case HiveParser.TOK_TBLTEXTFILE:
+ case HiveParser.TOK_TBLRCFILE:
+ case HiveParser.TOK_TBLORCFILE:
+ case HiveParser.TOK_TABLEFILEFORMAT:
+ storageFormat.fillStorageFormat(child, shared);
+ localDirectoryDesc.setOutputFormat(storageFormat.outputFormat);
+ localDirectoryDesc.setSerName(shared.serde);
+ localDirectoryDescIsSet=true;
+ break;
+ }
+ }
+ }
+ if (localDirectoryDescIsSet){
+ qb.setLocalDirectoryDesc(localDirectoryDesc);
+ }
break;
}
default:
@@ -5180,8 +5223,7 @@ public class SemanticAnalyzer extends Ba
String fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat);
} else {
- table_desc = PlanUtils.getDefaultTableDesc(Integer
- .toString(Utilities.ctrlaCode), cols, colTypes, false);
+ table_desc = PlanUtils.getDefaultTableDesc(qb.getLLocalDirectoryDesc(), cols, colTypes);
}
} else {
table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Tue Apr 30 19:45:38 2013
@@ -100,6 +100,56 @@ public final class PlanUtils {
}
}
+ public static TableDesc getDefaultTableDesc(CreateTableDesc localDirectoryDesc,
+ String cols, String colTypes ) {
+ TableDesc tableDesc = getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols,
+ colTypes, false);;
+ if (localDirectoryDesc == null) {
+ return tableDesc;
+ }
+
+ try {
+ if (localDirectoryDesc.getFieldDelim() != null) {
+ tableDesc.getProperties().setProperty(
+ serdeConstants.FIELD_DELIM, localDirectoryDesc.getFieldDelim());
+ tableDesc.getProperties().setProperty(
+ serdeConstants.SERIALIZATION_FORMAT, localDirectoryDesc.getFieldDelim());
+ }
+ if (localDirectoryDesc.getLineDelim() != null) {
+ tableDesc.getProperties().setProperty(
+ serdeConstants.LINE_DELIM, localDirectoryDesc.getLineDelim());
+ }
+ if (localDirectoryDesc.getCollItemDelim() != null) {
+ tableDesc.getProperties().setProperty(
+ serdeConstants.COLLECTION_DELIM, localDirectoryDesc.getCollItemDelim());
+ }
+ if (localDirectoryDesc.getMapKeyDelim() != null) {
+ tableDesc.getProperties().setProperty(
+ serdeConstants.MAPKEY_DELIM, localDirectoryDesc.getMapKeyDelim());
+ }
+ if (localDirectoryDesc.getFieldEscape() !=null) {
+ tableDesc.getProperties().setProperty(
+ serdeConstants.ESCAPE_CHAR, localDirectoryDesc.getFieldEscape());
+ }
+ if (localDirectoryDesc.getSerName() != null) {
+ tableDesc.setSerdeClassName(localDirectoryDesc.getSerName());
+ tableDesc.getProperties().setProperty(
+ serdeConstants.SERIALIZATION_LIB, localDirectoryDesc.getSerName());
+ tableDesc.setDeserializerClass(
+ (Class<? extends Deserializer>) Class.forName(localDirectoryDesc.getSerName()));
+ }
+ if (localDirectoryDesc.getOutputFormat() != null){
+ tableDesc.setOutputFileFormatClass(Class.forName(localDirectoryDesc.getOutputFormat()));
+ }
+ } catch (ClassNotFoundException e) {
+ // mimicking behaviour in CreateTableDesc tableDesc creation
+ // returning null table description for output.
+ e.printStackTrace();
+ return null;
+ }
+ return tableDesc;
+ }
+
/**
* Generate the table descriptor of MetadataTypedColumnsetSerDe with the
* separatorCode and column names (comma separated string).
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java Tue Apr 30 19:45:38 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UD
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -303,6 +304,14 @@ public class GenericUDAFAverage extends
}
}
+ @AggregationType(estimable = true)
+ static class AverageAgg extends AbstractAggregationBuffer {
+ long count;
+ double sum;
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES2 * 2; }
+ };
+
@Override
public void reset(AggregationBuffer aggregation) throws HiveException {
doReset((AverageAggregationBuffer<TYPE>)aggregation);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java Tue Apr 30 19:45:38 2013
@@ -147,7 +147,7 @@ public class GenericUDAFBridge extends A
}
/** class for storing UDAFEvaluator value. */
- static class UDAFAgg implements AggregationBuffer {
+ static class UDAFAgg extends AbstractAggregationBuffer {
UDAFEvaluator ueObject;
UDAFAgg(UDAFEvaluator ueObject) {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java Tue Apr 30 19:45:38 2013
@@ -99,7 +99,7 @@ public class GenericUDAFCollectSet exten
}
}
- static class MkArrayAggregationBuffer implements AggregationBuffer {
+ static class MkArrayAggregationBuffer extends AbstractAggregationBuffer {
Set<Object> container;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Tue Apr 30 19:45:38 2013
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -185,11 +186,17 @@ public class GenericUDAFComputeStats ext
foi);
}
- public static class BooleanStatsAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ public static class BooleanStatsAgg extends AbstractAggregationBuffer {
public String columnType; /* Datatype of column */
public long countTrues; /* Count of number of true values seen so far */
public long countFalses; /* Count of number of false values seen so far */
public long countNulls; /* Count of number of null values seen so far */
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.primitive2() * 3 + model.lengthFor(columnType);
+ }
};
@Override
@@ -426,7 +433,9 @@ public class GenericUDAFComputeStats ext
}
}
- public static class LongStatsAgg implements AggregationBuffer {
+
+ @AggregationType(estimable = true)
+ public static class LongStatsAgg extends AbstractAggregationBuffer {
public String columnType;
public long min; /* Minimum value seen so far */
public long max; /* Maximum value seen so far */
@@ -434,6 +443,12 @@ public class GenericUDAFComputeStats ext
public LongNumDistinctValueEstimator numDV; /* Distinct value estimator */
public boolean firstItem; /* First item in the aggBuf? */
public int numBitVectors;
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.primitive1() * 2 + model.primitive2() * 3 +
+ model.lengthFor(columnType) + model.lengthFor(numDV);
+ }
};
@Override
@@ -738,7 +753,8 @@ public class GenericUDAFComputeStats ext
}
}
- public static class DoubleStatsAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ public static class DoubleStatsAgg extends AbstractAggregationBuffer {
public String columnType;
public double min; /* Minimum value seen so far */
public double max; /* Maximum value seen so far */
@@ -746,6 +762,12 @@ public class GenericUDAFComputeStats ext
public DoubleNumDistinctValueEstimator numDV; /* Distinct value estimator */
public boolean firstItem; /* First item in the aggBuf? */
public int numBitVectors;
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.primitive1() * 2 + model.primitive2() * 3 +
+ model.lengthFor(columnType) + model.lengthFor(numDV);
+ }
};
@Override
@@ -1061,7 +1083,8 @@ public class GenericUDAFComputeStats ext
}
}
- public static class StringStatsAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ public static class StringStatsAgg extends AbstractAggregationBuffer {
public String columnType;
public long maxLength; /* Maximum length seen so far */
public long sumLength; /* Sum of lengths of all values seen so far */
@@ -1070,6 +1093,12 @@ public class GenericUDAFComputeStats ext
public StringNumDistinctValueEstimator numDV; /* Distinct value estimator */
public int numBitVectors;
public boolean firstItem;
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.primitive1() * 2 + model.primitive2() * 4 +
+ model.lengthFor(columnType) + model.lengthFor(numDV);
+ }
};
@Override
@@ -1377,12 +1406,18 @@ public class GenericUDAFComputeStats ext
}
}
- public static class BinaryStatsAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ public static class BinaryStatsAgg extends AbstractAggregationBuffer {
public String columnType;
public long maxLength; /* Maximum length seen so far */
public long sumLength; /* Sum of lengths of all values seen so far */
public long count; /* Count of all values seen so far */
public long countNulls; /* Count of number of null values seen so far */
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.primitive2() * 4 + model.lengthFor(columnType);
+ }
};
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java Tue Apr 30 19:45:38 2013
@@ -400,7 +400,7 @@ public class GenericUDAFContextNGrams im
// Aggregation buffer methods.
- static class NGramAggBuf implements AggregationBuffer {
+ static class NGramAggBuf extends AbstractAggregationBuffer {
ArrayList<String> context;
NGramEstimator nge;
};
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -247,13 +248,16 @@ public class GenericUDAFCorrelation exte
}
}
- static class StdAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class StdAgg extends AbstractAggregationBuffer {
long count; // number n of elements
double xavg; // average of x elements
double yavg; // average of y elements
double xvar; // n times the variance of x elements
double yvar; // n times the variance of y elements
double covar; // n times the covariance
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES2 * 6; }
};
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java Tue Apr 30 19:45:38 2013
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -99,8 +100,11 @@ public class GenericUDAFCount implements
}
/** class for storing count value. */
- static class CountAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class CountAgg extends AbstractAggregationBuffer {
long value;
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES2; }
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -38,7 +39,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
/**
* Compute the covariance covar_pop(x, y), using the following one-pass method
@@ -224,11 +224,14 @@ public class GenericUDAFCovariance exten
}
}
- static class StdAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class StdAgg extends AbstractAggregationBuffer {
long count; // number n of elements
double xavg; // average of x elements
double yavg; // average of y elements
double covar; // n times the covariance
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES2 * 4; }
};
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java Tue Apr 30 19:45:38 2013
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
import javaewah.EWAHCompressedBitmap;
@@ -109,8 +108,13 @@ public class GenericUDAFEWAHBitmap exten
}
/** class for storing the current partial result aggregation */
- static class BitmapAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class BitmapAgg extends AbstractAggregationBuffer {
EWAHCompressedBitmap bitmap;
+ @Override
+ public int estimate() {
+ return bitmap.sizeInBytes();
+ }
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java Tue Apr 30 19:45:38 2013
@@ -41,6 +41,19 @@ import org.apache.hadoop.hive.serde2.obj
@UDFType(deterministic = true)
public abstract class GenericUDAFEvaluator implements Closeable {
+ public static @interface AggregationType {
+ boolean estimable() default false;
+ }
+
+ public static boolean isEstimable(AggregationBuffer buffer) {
+ if (buffer instanceof AbstractAggregationBuffer) {
+ Class<? extends AggregationBuffer> clazz = buffer.getClass();
+ AggregationType annotation = clazz.getAnnotation(AggregationType.class);
+ return annotation != null && annotation.estimable();
+ }
+ return false;
+ }
+
/**
* Mode.
*
@@ -123,10 +136,21 @@ public abstract class GenericUDAFEvaluat
*
* In the future, we may completely hide this class inside the Evaluator and
* use integer numbers to identify which aggregation we are looking at.
+ *
+ * @deprecated use {@link AbstractAggregationBuffer} instead
*/
public static interface AggregationBuffer {
};
+ public static abstract class AbstractAggregationBuffer implements AggregationBuffer {
+ /**
+ * Estimate the size of memory which is occupied by aggregation buffer.
+ * Currently, hive assumes that primitives types occupies 16 byte and java object has
+ * 64 byte overhead for each. For map, each entry also has 64 byte overhead.
+ */
+ public int estimate() { return -1; }
+ }
+
/**
* Get a new aggregation object.
*/
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java Tue Apr 30 19:45:38 2013
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
import java.util.ArrayList;
import java.util.List;
-import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -27,21 +26,16 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.util.StringUtils;
/**
* Computes an approximate histogram of a numerical column using a user-specified number of bins.
@@ -235,8 +229,13 @@ public class GenericUDAFHistogramNumeric
// Aggregation buffer definition and manipulation methods
- static class StdAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class StdAgg extends AbstractAggregationBuffer {
NumericHistogram histogram; // the histogram object
+ @Override
+ public int estimate() {
+ return JavaDataModel.get().lengthFor(histogram);
+ }
};
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java Tue Apr 30 19:45:38 2013
@@ -71,7 +71,7 @@ public class GenericUDAFMax extends Abst
}
/** class for storing the current max value */
- static class MaxAgg implements AggregationBuffer {
+ static class MaxAgg extends AbstractAggregationBuffer {
Object o;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java Tue Apr 30 19:45:38 2013
@@ -71,7 +71,7 @@ public class GenericUDAFMin extends Abst
}
/** class for storing the current max value */
- static class MinAgg implements AggregationBuffer {
+ static class MinAgg extends AbstractAggregationBuffer {
Object o;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java Tue Apr 30 19:45:38 2013
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
@@ -33,15 +34,9 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.util.StringUtils;
/**
* Computes an approximate percentile (quantile) from an approximate histogram, for very
@@ -353,9 +348,16 @@ public class GenericUDAFPercentileApprox
// Aggregation buffer methods. We wrap GenericUDAFHistogramNumeric's aggregation buffer
// inside our own, so that we can also store requested quantile values between calls
- static class PercentileAggBuf implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class PercentileAggBuf extends AbstractAggregationBuffer {
NumericHistogram histogram; // histogram used for quantile approximation
double[] quantiles; // the quantiles requested
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.lengthFor(histogram) +
+ model.array() + JavaDataModel.PRIMITIVES2 * quantiles.length;
+ }
};
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java Tue Apr 30 19:45:38 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -96,7 +97,8 @@ public class GenericUDAFSum extends Abst
}
/** class for storing decimal sum value. */
- static class SumHiveDecimalAgg implements AggregationBuffer {
+ @AggregationType(estimable = false) // hard to know exactly for decimals
+ static class SumHiveDecimalAgg extends AbstractAggregationBuffer {
boolean empty;
HiveDecimal sum;
}
@@ -188,9 +190,12 @@ public class GenericUDAFSum extends Abst
}
/** class for storing double sum value. */
- static class SumDoubleAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class SumDoubleAgg extends AbstractAggregationBuffer {
boolean empty;
double sum;
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; }
}
@Override
@@ -270,9 +275,12 @@ public class GenericUDAFSum extends Abst
}
/** class for storing double sum value. */
- static class SumLongAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class SumLongAgg extends AbstractAggregationBuffer {
boolean empty;
long sum;
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; }
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -171,10 +172,13 @@ public class GenericUDAFVariance extends
}
}
- static class StdAgg implements AggregationBuffer {
+ @AggregationType(estimable = true)
+ static class StdAgg extends AbstractAggregationBuffer {
long count; // number of elements
double sum; // sum of elements
double variance; // sum[x-avg^2] (this is actually n times the variance)
+ @Override
+ public int estimate() { return JavaDataModel.PRIMITIVES2 * 3; }
};
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java Tue Apr 30 19:45:38 2013
@@ -338,7 +338,7 @@ public class GenericUDAFnGrams implement
}
// Aggregation buffer methods.
- static class NGramAggBuf implements AggregationBuffer {
+ static class NGramAggBuf extends AbstractAggregationBuffer {
NGramEstimator nge;
int n;
};
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java Tue Apr 30 19:45:38 2013
@@ -313,4 +313,8 @@ public class NumericHistogram {
return result;
}
+
+ public int getNumBins() {
+ return bins == null ? 0 : bins.size();
+ }
}
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q Tue Apr 30 19:45:38 2013
@@ -3,6 +3,6 @@ EXPLAIN
LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
EXPLAIN FORMATTED
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4;
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4;
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias;
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q Tue Apr 30 19:45:38 2013
@@ -1,7 +1,3 @@
-set hive.mapjoin.cache.numrows = 2;
-
-
-
CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE;
explain
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q Tue Apr 30 19:45:38 2013
@@ -29,7 +29,6 @@ SORT BY src1.key, src1.value, src2.key,
SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20)
SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value;
-set hive.mapjoin.cache.numrows=2;
EXPLAIN
SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q Tue Apr 30 19:45:38 2013
@@ -1,5 +1,3 @@
-set hive.mapjoin.cache.numrows=100;
-
SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a
FROM srcpart a
JOIN src b ON a.key = b.key where a.ds is not null;
Modified: hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out Tue Apr 30 19:45:38 2013
@@ -1,10 +1,5 @@
Saving all output to "!!{outputDirectory}!!/join39.q.raw". Enter "record" with no arguments to stop it.
>>> !run !!{qFileDirectory}!!/join39.q
->>> set hive.mapjoin.cache.numrows = 2;
-No rows affected
->>>
->>>
->>>
>>> CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE;
No rows affected
>>>
Modified: hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out Tue Apr 30 19:45:38 2013
@@ -3205,8 +3205,6 @@ SORT BY src1.key, src1.value, src2.key,
'9','val_9','9','val_9','9','val_9'
548 rows selected
>>>
->>> set hive.mapjoin.cache.numrows=2;
-No rows affected
>>>
>>> EXPLAIN
SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value
Modified: hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out Tue Apr 30 19:45:38 2013
@@ -1,8 +1,5 @@
Saving all output to "!!{outputDirectory}!!/mapjoin1.q.raw". Enter "record" with no arguments to stop it.
>>> !run !!{qFileDirectory}!!/mapjoin1.q
->>> set hive.mapjoin.cache.numrows=100;
-No rows affected
->>>
>>> SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a
FROM srcpart a
JOIN src b ON a.key = b.key where a.ds is not null;
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out Tue Apr 30 19:45:38 2013
@@ -43,17 +43,17 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
POSTHOOK: type: LOAD
POSTHOOK: Output: default@input4
PREHOOK: query: EXPLAIN FORMATTED
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN FORMATTED
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
POSTHOOK: type: QUERY
-{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Split Sample:":{},"Alias -> Map Operator Tree:":{"input4":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) KEY)))))"}
-PREHOOK: query: SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Split Sample:":{},"Alias -> Map Operator Tree:":{"input4alias":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4) Input4Alias)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Input4Alias) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Input4Alias) KEY)))))"}
+PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
PREHOOK: type: QUERY
PREHOOK: Input: default@input4
#### A masked pattern was here ####
-POSTHOOK: query: SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+POSTHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
POSTHOOK: type: QUERY
POSTHOOK: Input: default@input4
#### A masked pattern was here ####
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out Tue Apr 30 19:45:38 2013
@@ -110,7 +110,10 @@ month
named_struct
negative
ngrams
+noop
+noopwithmap
not
+npath
ntile
nvl
or
@@ -174,6 +177,7 @@ var_samp
variance
weekofyear
when
+windowingtablefunction
xpath
xpath_boolean
xpath_double