You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/07 02:21:48 UTC
svn commit: r1616379 [1/4] - in /hive/branches/cbo: ./
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/
hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/
hcatalog/webhc...
Author: gunther
Date: Thu Aug 7 00:21:45 2014
New Revision: 1616379
URL: http://svn.apache.org/r1616379
Log:
Merge latest trunk into cbo branch (Gunther Hagleitner)
Added:
hive/branches/cbo/data/files/parquet_mixed_case
- copied unchanged from r1616375, hive/trunk/data/files/parquet_mixed_case
hive/branches/cbo/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java
- copied unchanged from r1616375, hive/trunk/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
- copied unchanged from r1616375, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
hive/branches/cbo/ql/src/test/queries/clientnegative/char_pad_convert_fail0.q
- copied unchanged from r1616375, hive/trunk/ql/src/test/queries/clientnegative/char_pad_convert_fail0.q
hive/branches/cbo/ql/src/test/queries/clientnegative/char_pad_convert_fail1.q
- copied unchanged from r1616375, hive/trunk/ql/src/test/queries/clientnegative/char_pad_convert_fail1.q
hive/branches/cbo/ql/src/test/queries/clientnegative/char_pad_convert_fail2.q
- copied unchanged from r1616375, hive/trunk/ql/src/test/queries/clientnegative/char_pad_convert_fail2.q
hive/branches/cbo/ql/src/test/queries/clientnegative/char_pad_convert_fail3.q
- copied unchanged from r1616375, hive/trunk/ql/src/test/queries/clientnegative/char_pad_convert_fail3.q
hive/branches/cbo/ql/src/test/queries/clientpositive/char_pad_convert.q
- copied unchanged from r1616375, hive/trunk/ql/src/test/queries/clientpositive/char_pad_convert.q
hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_mixed_case.q
- copied unchanged from r1616375, hive/trunk/ql/src/test/queries/clientpositive/parquet_mixed_case.q
hive/branches/cbo/ql/src/test/results/clientnegative/char_pad_convert_fail0.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientnegative/char_pad_convert_fail0.q.out
hive/branches/cbo/ql/src/test/results/clientnegative/char_pad_convert_fail1.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientnegative/char_pad_convert_fail1.q.out
hive/branches/cbo/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
hive/branches/cbo/ql/src/test/results/clientnegative/char_pad_convert_fail3.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientnegative/char_pad_convert_fail3.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/char_pad_convert.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/char_pad_convert.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/parquet_mixed_case.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/parquet_mixed_case.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_12.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_12.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_13.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_13.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_14.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_14.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_9.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_9.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
- copied unchanged from r1616375, hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
hive/branches/cbo/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java
- copied unchanged from r1616375, hive/trunk/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java
Modified:
hive/branches/cbo/ (props changed)
hive/branches/cbo/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/branches/cbo/conf/hive-default.xml.template
hive/branches/cbo/data/files/dept.txt
hive/branches/cbo/data/files/emp.txt
hive/branches/cbo/data/files/loc.txt
hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java
hive/branches/cbo/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java
hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java
hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProvider.java
hive/branches/cbo/itests/qtest/testconfiguration.properties
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q
hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q
hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q
hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q
hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q
hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q
hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q
hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_part.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_select.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_table.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_union.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/combine2.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/groupby_sort_11.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/input24.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/input25.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/metadataonly1.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/nullgroup3.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/parquet_columnar.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/union5.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/union7.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorization_15.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/udf_explode.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/udtf_explode.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union11.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union14.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union15.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union17.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union19.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union20.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union21.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union5.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/union7.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/vectorization_14.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/vectorization_15.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/vectorization_16.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/vectorization_9.q.out
hive/branches/cbo/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
hive/branches/cbo/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
hive/branches/cbo/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
hive/branches/cbo/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
hive/branches/cbo/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
Propchange: hive/branches/cbo/
------------------------------------------------------------------------------
Merged /hive/trunk:r1615870-1616375
Modified: hive/branches/cbo/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/common/src/java/org/apache/hadoop/hive/common/FileUtils.java (original)
+++ hive/branches/cbo/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Thu Aug 7 00:21:45 2014
@@ -22,6 +22,8 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.security.AccessControlException;
+import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
@@ -352,35 +354,47 @@ public final class FileUtils {
}
/**
- * Check if the given FileStatus indicates that the action is allowed for
- * userName. It checks the group and other permissions also to determine this.
- *
- * @param userName
- * @param fsStatus
- * @param action
- * @return true if it is writable for userName
- */
- public static boolean isActionPermittedForUser(String userName, FileStatus fsStatus, FsAction action) {
- FsPermission permissions = fsStatus.getPermission();
- // check user perm
- if (fsStatus.getOwner().equals(userName)
- && permissions.getUserAction().implies(action)) {
- return true;
- }
- // check other perm
- if (permissions.getOtherAction().implies(action)) {
- return true;
- }
- // check group perm after ensuring user belongs to the file owner group
- String fileGroup = fsStatus.getGroup();
- String[] userGroups = UserGroupInformation.createRemoteUser(userName).getGroupNames();
- for (String group : userGroups) {
- if (group.equals(fileGroup)) {
- // user belongs to the file group
- return permissions.getGroupAction().implies(action);
+ * Perform a check to determine if the user is able to access the file passed in.
+ * If the user name passed in is different from the current user, this method will
+ * attempt to do impersonate the user to do the check; the current user should be
+ * able to create proxy users in this case.
+ * @param fs FileSystem of the path to check
+ * @param stat FileStatus representing the file
+ * @param action FsAction that will be checked
+ * @param user User name of the user that will be checked for access. If the user name
+ * is null or the same as the current user, no user impersonation will be done
+ * and the check will be done as the current user. Otherwise the file access
+ * check will be performed within a doAs() block to use the access privileges
+ * of this user. In this case the user must be configured to impersonate other
+ * users, otherwise this check will fail with error.
+ * @param groups List of groups for the user
+ * @throws IOException
+ * @throws AccessControlException
+ * @throws InterruptedException
+ * @throws Exception
+ */
+ public static void checkFileAccessWithImpersonation(final FileSystem fs,
+ final FileStatus stat, final FsAction action, final String user)
+ throws IOException, AccessControlException, InterruptedException, Exception {
+ UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf());
+ String currentUser = ShimLoader.getHadoopShims().getShortUserName(ugi);
+
+ if (user == null || currentUser.equals(user)) {
+ // No need to impersonate user, do the checks as the currently configured user.
+ ShimLoader.getHadoopShims().checkFileAccess(fs, stat, action);
+ return;
+ }
+
+ // Otherwise, try user impersonation. Current user must be configured to do user impersonation.
+ UserGroupInformation proxyUser = ShimLoader.getHadoopShims().createProxyUser(user);
+ ShimLoader.getHadoopShims().doAs(proxyUser, new PrivilegedExceptionAction<Object>() {
+ @Override
+ public Object run() throws Exception {
+ FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf());
+ ShimLoader.getHadoopShims().checkFileAccess(fsAsUser, stat, action);
+ return null;
}
- }
- return false;
+ });
}
/**
@@ -395,7 +409,7 @@ public final class FileUtils {
* @throws IOException
*/
public static boolean isActionPermittedForFileHierarchy(FileSystem fs, FileStatus fileStatus,
- String userName, FsAction action) throws IOException {
+ String userName, FsAction action) throws Exception {
boolean isDir = fileStatus.isDir();
FsAction dirActionNeeded = action;
@@ -403,7 +417,11 @@ public final class FileUtils {
// for dirs user needs execute privileges as well
dirActionNeeded.and(FsAction.EXECUTE);
}
- if (!isActionPermittedForUser(userName, fileStatus, dirActionNeeded)) {
+
+ try {
+ checkFileAccessWithImpersonation(fs, fileStatus, action, userName);
+ } catch (AccessControlException err) {
+ // Action not permitted for user
return false;
}
Modified: hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Aug 7 00:21:45 2014
@@ -1104,7 +1104,9 @@ public class HiveConf extends Configurat
"Whether queries will fail because stats cannot be collected completely accurately. \n" +
"If this is set to true, reading/writing from/into a partition may fail because the stats\n" +
"could not be computed accurately."),
-
+ HIVE_STATS_COLLECT_PART_LEVEL_STATS("hive.analyze.stmt.collect.partlevel.stats", true,
+ "analyze table T compute statistics for columns. Queries like these should compute partition"
+ + "level stats for partitioned table even when no part spec is specified."),
HIVE_STATS_GATHER_NUM_THREADS("hive.stats.gather.num.threads", 10,
"Number of threads used by partialscan/noscan analyze command for partitioned tables.\n" +
"This is applicable only for file formats that implement StatsProvidingRecordReader (like ORC)."),
Modified: hive/branches/cbo/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/branches/cbo/conf/hive-default.xml.template?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/conf/hive-default.xml.template (original)
+++ hive/branches/cbo/conf/hive-default.xml.template Thu Aug 7 00:21:45 2014
@@ -1957,6 +1957,11 @@
</description>
</property>
<property>
+ <key>hive.analyze.stmt.collect.partlevel.stats</key>
+ <value>true</value>
+ <description>analyze table T compute statistics for columns. Queries like these should compute partitionlevel stats for partitioned table even when no part spec is specified.</description>
+ </property>
+ <property>
<key>hive.stats.gather.num.threads</key>
<value>10</value>
<description>
Modified: hive/branches/cbo/data/files/dept.txt
URL: http://svn.apache.org/viewvc/hive/branches/cbo/data/files/dept.txt?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/data/files/dept.txt (original)
+++ hive/branches/cbo/data/files/dept.txt Thu Aug 7 00:21:45 2014
@@ -2,3 +2,5 @@
33|engineering
34|clerical
35|marketing
+36|transport
+37|hr
Modified: hive/branches/cbo/data/files/emp.txt
URL: http://svn.apache.org/viewvc/hive/branches/cbo/data/files/emp.txt?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/data/files/emp.txt (original)
+++ hive/branches/cbo/data/files/emp.txt Thu Aug 7 00:21:45 2014
@@ -1,6 +1,48 @@
-Rafferty|31
-Jones|33
-Steinberg|33
-Robinson|34
-Smith|34
-John|
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
Modified: hive/branches/cbo/data/files/loc.txt
URL: http://svn.apache.org/viewvc/hive/branches/cbo/data/files/loc.txt?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/data/files/loc.txt (original)
+++ hive/branches/cbo/data/files/loc.txt Thu Aug 7 00:21:45 2014
@@ -1,8 +1,8 @@
-OH|31|43201|2001
-IO|32|43202|2001
-CA|35|43809|2001
-FL|33|54342|2001
-UT|35||2001
-CA|35|43809|2001
-|34|40000|
-FL|33|54342|2001
+OH|1|43201|2001
+IO|2|43202|2001
+CA|5|43809|2001
+FL|3|54342|2001
+UT|5||2001
+CA|5|43809|2001
+|4|40000|
+FL|6|54342|2001
Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java (original)
+++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java Thu Aug 7 00:21:45 2014
@@ -32,7 +32,11 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,7 +45,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
@@ -568,12 +571,16 @@ public class HCatCreateTableDesc {
desc.fileFormat = fileFormat;
if ("SequenceFile".equalsIgnoreCase(fileFormat)) {
desc.inputformat = SequenceFileInputFormat.class.getName();
- desc.outputformat = SequenceFileOutputFormat.class
+ desc.outputformat = HiveSequenceFileOutputFormat.class
.getName();
} else if ("RCFile".equalsIgnoreCase(fileFormat)) {
desc.inputformat = RCFileInputFormat.class.getName();
desc.outputformat = RCFileOutputFormat.class.getName();
desc.serde = ColumnarSerDe.class.getName();
+ } else if ("orcfile".equalsIgnoreCase(fileFormat)) {
+ desc.inputformat = OrcInputFormat.class.getName();
+ desc.outputformat = OrcOutputFormat.class.getName();
+ desc.serde = OrcSerde.class.getName();
}
desc.storageHandler = StringUtils.EMPTY;
} else if (!StringUtils.isEmpty(storageHandler)) {
@@ -583,7 +590,7 @@ public class HCatCreateTableDesc {
LOG.info("Using text file format for the table.");
desc.inputformat = TextInputFormat.class.getName();
LOG.info("Table input format:" + desc.inputformat);
- desc.outputformat = IgnoreKeyTextOutputFormat.class
+ desc.outputformat = HiveIgnoreKeyTextOutputFormat.class
.getName();
LOG.info("Table output format:" + desc.outputformat);
}
Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java (original)
+++ hive/branches/cbo/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java Thu Aug 7 00:21:45 2014
@@ -30,9 +30,12 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
-import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
@@ -119,6 +122,7 @@ public class TestHCatClient {
String db = "testdb";
String tableOne = "testTable1";
String tableTwo = "testTable2";
+ String tableThree = "testTable3";
HCatClient client = HCatClient.create(new Configuration(hcatConf));
client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE);
@@ -170,7 +174,7 @@ public class TestHCatClient {
assertTrue(table2.getInputFileFormat().equalsIgnoreCase(
TextInputFormat.class.getName()));
assertTrue(table2.getOutputFileFormat().equalsIgnoreCase(
- IgnoreKeyTextOutputFormat.class.getName()));
+ HiveIgnoreKeyTextOutputFormat.class.getName()));
assertTrue("SerdeParams not found", table2.getSerdeParams() != null);
assertEquals("checking " + serdeConstants.FIELD_DELIM, Character.toString('\001'),
table2.getSerdeParams().get(serdeConstants.FIELD_DELIM));
@@ -186,6 +190,19 @@ public class TestHCatClient {
table2.getSerdeParams().get(serdeConstants.SERIALIZATION_NULL_FORMAT));
assertEquals((expectedDir + "/" + db + ".db/" + tableTwo).toLowerCase(), table2.getLocation().toLowerCase());
+
+ HCatCreateTableDesc tableDesc3 = HCatCreateTableDesc.create(db,
+ tableThree, cols).fileFormat("orcfile").build();
+ client.createTable(tableDesc3);
+ HCatTable table3 = client.getTable(db, tableThree);
+ assertTrue(table3.getInputFileFormat().equalsIgnoreCase(
+ OrcInputFormat.class.getName()));
+ assertTrue(table3.getOutputFileFormat().equalsIgnoreCase(
+ OrcOutputFormat.class.getName()));
+ assertTrue(table3.getSerdeLib().equalsIgnoreCase(
+ OrcSerde.class.getName()));
+ assertTrue(table1.getCols().equals(cols));
+
client.close();
}
Modified: hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java (original)
+++ hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java Thu Aug 7 00:21:45 2014
@@ -72,6 +72,9 @@ public class TestMetastoreAuthorizationP
return DefaultHiveMetastoreAuthorizationProvider.class.getName();
}
+ protected HiveConf createHiveConf() throws Exception {
+ return new HiveConf(this.getClass());
+ }
@Override
protected void setUp() throws Exception {
@@ -92,7 +95,7 @@ public class TestMetastoreAuthorizationP
MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge());
- clientHiveConf = new HiveConf(this.getClass());
+ clientHiveConf = createHiveConf();
// Turn off client-side authorization
clientHiveConf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED,false);
@@ -134,10 +137,23 @@ public class TestMetastoreAuthorizationP
return "smp_ms_tbl";
}
+ protected boolean isTestEnabled() {
+ return true;
+ }
+
+ protected String setupUser() {
+ return ugi.getUserName();
+ }
+
public void testSimplePrivileges() throws Exception {
+ if (!isTestEnabled()) {
+ System.out.println("Skipping test " + this.getClass().getName());
+ return;
+ }
+
String dbName = getTestDbName();
String tblName = getTestTableName();
- String userName = ugi.getUserName();
+ String userName = setupUser();
allowCreateDatabase(userName);
Modified: hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProvider.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProvider.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProvider.java (original)
+++ hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProvider.java Thu Aug 7 00:21:45 2014
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.security;
import java.net.URI;
+import java.security.AccessControlException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -81,7 +82,7 @@ public class TestStorageBasedMetastoreAu
setPermissions(location,"-rwxr--r--");
}
- private void setPermissions(String locn, String permissions) throws Exception {
+ protected void setPermissions(String locn, String permissions) throws Exception {
FileSystem fs = FileSystem.get(new URI(locn), clientHiveConf);
fs.setPermission(new Path(locn), FsPermission.valueOf(permissions));
}
@@ -89,7 +90,7 @@ public class TestStorageBasedMetastoreAu
@Override
protected void assertNoPrivileges(MetaException me){
assertNotNull(me);
- assertTrue(me.getMessage().indexOf("not permitted") != -1);
+ assertTrue(me.getMessage().indexOf("AccessControlException") != -1);
}
@Override
Modified: hive/branches/cbo/itests/qtest/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/qtest/testconfiguration.properties?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/itests/qtest/testconfiguration.properties (original)
+++ hive/branches/cbo/itests/qtest/testconfiguration.properties Thu Aug 7 00:21:45 2014
@@ -1,5 +1,5 @@
minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q,temp_table_external.q
minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,tez_join_hash.q
-minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transf
orm_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q,cbo_correctness.q
+minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transf
orm_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q,vector_decimal_aggregate.q,vector_left_outer_join.q,vectorization_12.q,vectorization_13.q,vectorization_14.q,vectorization_9.q,vectorization_part_project.q,vectorization_short_regress.q,vectorized_mapjoin.q,vectorized_nested_mapjoin.q,vectorized_shufflejoin.q,vectorized_timestamp_funcs.q,cbo_correctness.q
beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,
exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwr
ite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Thu Aug 7 00:21:45 2014
@@ -705,13 +705,9 @@ public abstract class CommonJoinOperator
}
if (allOne) {
- LOG.info("calling genAllOneUniqueJoinObject");
genAllOneUniqueJoinObject();
- LOG.info("called genAllOneUniqueJoinObject");
} else {
- LOG.trace("calling genUniqueJoinObject");
genUniqueJoinObject(0, 0);
- LOG.trace("called genUniqueJoinObject");
}
} else {
// does any result need to be emitted
@@ -753,17 +749,11 @@ public abstract class CommonJoinOperator
}
if (!hasEmpty && !mayHasMoreThanOne) {
- LOG.trace("calling genAllOneUniqueJoinObject");
genAllOneUniqueJoinObject();
- LOG.trace("called genAllOneUniqueJoinObject");
} else if (!hasEmpty && !hasLeftSemiJoin) {
- LOG.trace("calling genUniqueJoinObject");
genUniqueJoinObject(0, 0);
- LOG.trace("called genUniqueJoinObject");
} else {
- LOG.trace("calling genObject");
genJoinObject();
- LOG.trace("called genObject");
}
}
Arrays.fill(aliasFilterTags, (byte)0xff);
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Thu Aug 7 00:21:45 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -64,23 +65,9 @@ import org.apache.hadoop.hive.ql.plan.Un
*
*/
public final class OperatorFactory {
+ private static final List<OpTuple> opvec;
+ private static final List<OpTuple> vectorOpvec;
- /**
- * OpTuple.
- *
- * @param <T>
- */
- public static final class OpTuple<T extends OperatorDesc> {
- public Class<T> descClass;
- public Class<? extends Operator<T>> opClass;
-
- public OpTuple(Class<T> descClass, Class<? extends Operator<T>> opClass) {
- this.descClass = descClass;
- this.opClass = opClass;
- }
- }
-
- public static ArrayList<OpTuple> opvec;
static {
opvec = new ArrayList<OpTuple>();
opvec.add(new OpTuple<FilterDesc>(FilterDesc.class, FilterOperator.class));
@@ -116,7 +103,6 @@ public final class OperatorFactory {
MuxOperator.class));
}
- public static ArrayList<OpTuple> vectorOpvec;
static {
vectorOpvec = new ArrayList<OpTuple>();
vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class));
@@ -128,8 +114,20 @@ public final class OperatorFactory {
vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
+ vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, VectorExtractOperator.class));
}
+ private static final class OpTuple<T extends OperatorDesc> {
+ private final Class<T> descClass;
+ private final Class<? extends Operator<T>> opClass;
+
+ public OpTuple(Class<T> descClass, Class<? extends Operator<T>> opClass) {
+ this.descClass = descClass;
+ this.opClass = opClass;
+ }
+ }
+
+
public static <T extends OperatorDesc> Operator<T> getVectorOperator(T conf,
VectorizationContext vContext) throws HiveException {
Class<T> descClass = (Class<T>) conf.getClass();
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Thu Aug 7 00:21:45 2014
@@ -25,6 +25,8 @@ import java.util.Arrays;
import java.util.List;
import java.util.Random;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -61,9 +63,20 @@ public class ReduceSinkOperator extends
PTFUtils.makeTransient(ReduceSinkOperator.class, "inputAliases", "valueIndex");
}
+ private static final Log LOG = LogFactory.getLog(ReduceSinkOperator.class.getName());
private static final long serialVersionUID = 1L;
- protected transient OutputCollector out;
+ private static final MurmurHash hash = (MurmurHash) MurmurHash.getInstance();
+
+ private transient ObjectInspector[] partitionObjectInspectors;
+ private transient ObjectInspector[] bucketObjectInspectors;
+ private transient int buckColIdxInKey;
+ private boolean firstRow;
+ private transient int tag;
+ private boolean skipTag = false;
+ private transient InspectableObject tempInspectableObject = new InspectableObject();
+ private transient int[] valueIndex; // index for value(+ from keys, - from values)
+ protected transient OutputCollector out;
/**
* The evaluators for the key columns. Key columns decide the sort order on
* the reducer side. Key columns are passed to the reducer in the "key".
@@ -84,38 +97,40 @@ public class ReduceSinkOperator extends
* Evaluators for bucketing columns. This is used to compute bucket number.
*/
protected transient ExprNodeEvaluator[] bucketEval = null;
-
- // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is
- // ready
+ // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready
protected transient Serializer keySerializer;
protected transient boolean keyIsText;
protected transient Serializer valueSerializer;
- transient int tag;
protected transient byte[] tagByte = new byte[1];
- transient protected int numDistributionKeys;
- transient protected int numDistinctExprs;
- transient String[] inputAliases; // input aliases of this RS for join (used for PPD)
- private boolean skipTag = false;
+ protected transient int numDistributionKeys;
+ protected transient int numDistinctExprs;
+ protected transient String[] inputAliases; // input aliases of this RS for join (used for PPD)
protected transient boolean autoParallel = false;
-
- protected static final MurmurHash hash = (MurmurHash)MurmurHash.getInstance();
-
- private transient int[] valueIndex; // index for value(+ from keys, - from values)
-
- public void setInputAliases(String[] inputAliases) {
- this.inputAliases = inputAliases;
- }
-
- public String[] getInputAliases() {
- return inputAliases;
- }
-
- public void setOutputCollector(OutputCollector _out) {
- this.out = _out;
- }
-
// picks topN K:V pairs from input.
protected transient TopNHash reducerHash = new TopNHash();
+ protected transient HiveKey keyWritable = new HiveKey();
+ protected transient ObjectInspector keyObjectInspector;
+ protected transient ObjectInspector valueObjectInspector;
+ protected transient Object[] cachedValues;
+ protected transient List<List<Integer>> distinctColIndices;
+ protected transient Random random;
+ /**
+ * This two dimensional array holds key data and a corresponding Union object
+ * which contains the tag identifying the aggregate expression for distinct columns.
+ *
+ * If there is no distict expression, cachedKeys is simply like this.
+ * cachedKeys[0] = [col0][col1]
+ *
+ * with two distict expression, union(tag:key) is attatched for each distinct expression
+ * cachedKeys[0] = [col0][col1][0:dist1]
+ * cachedKeys[1] = [col0][col1][1:dist2]
+ *
+ * in this case, child GBY evaluates distict values with expression like KEY.col2:0.dist1
+ * see {@link ExprNodeColumnEvaluator}
+ */
+ // TODO: we only ever use one row of these at a time. Why do we need to cache multiple?
+ protected transient Object[][] cachedKeys;
+
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
try {
@@ -184,40 +199,12 @@ public class ReduceSinkOperator extends
firstRow = true;
initializeChildren(hconf);
} catch (Exception e) {
- e.printStackTrace();
+ String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
+ LOG.error(msg, e);
throw new RuntimeException(e);
}
}
- transient InspectableObject tempInspectableObject = new InspectableObject();
- protected transient HiveKey keyWritable = new HiveKey();
-
- protected transient ObjectInspector keyObjectInspector;
- protected transient ObjectInspector valueObjectInspector;
- transient ObjectInspector[] partitionObjectInspectors;
- transient ObjectInspector[] bucketObjectInspectors = null;
- transient int buckColIdxInKey;
-
- protected transient Object[] cachedValues;
- protected transient List<List<Integer>> distinctColIndices;
- /**
- * This two dimensional array holds key data and a corresponding Union object
- * which contains the tag identifying the aggregate expression for distinct columns.
- *
- * If there is no distict expression, cachedKeys is simply like this.
- * cachedKeys[0] = [col0][col1]
- *
- * with two distict expression, union(tag:key) is attatched for each distinct expression
- * cachedKeys[0] = [col0][col1][0:dist1]
- * cachedKeys[1] = [col0][col1][1:dist2]
- *
- * in this case, child GBY evaluates distict values with expression like KEY.col2:0.dist1
- * see {@link ExprNodeColumnEvaluator}
- */
- // TODO: we only ever use one row of these at a time. Why do we need to cache multiple?
- protected transient Object[][] cachedKeys;
- boolean firstRow;
- protected transient Random random;
/**
* Initializes array of ExprNodeEvaluator. Adds Union field for distinct
@@ -509,4 +496,16 @@ public class ReduceSinkOperator extends
public int[] getValueIndex() {
return valueIndex;
}
+
+ public void setInputAliases(String[] inputAliases) {
+ this.inputAliases = inputAliases;
+ }
+
+ public String[] getInputAliases() {
+ return inputAliases;
+ }
+
+ public void setOutputCollector(OutputCollector _out) {
+ this.out = _out;
+ }
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java Thu Aug 7 00:21:45 2014
@@ -250,7 +250,7 @@ public class ExecMapper extends MapReduc
+ used_memory);
}
- reportStats rps = new reportStats(rp);
+ ReportStats rps = new ReportStats(rp);
mo.preorderMap(rps);
return;
} catch (Exception e) {
@@ -285,10 +285,10 @@ public class ExecMapper extends MapReduc
* reportStats.
*
*/
- public static class reportStats implements Operator.OperatorFunc {
- Reporter rp;
+ public static class ReportStats implements Operator.OperatorFunc {
+ private final Reporter rp;
- public reportStats(Reporter rp) {
+ public ReportStats(Reporter rp) {
this.rp = rp;
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java Thu Aug 7 00:21:45 2014
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -306,7 +306,7 @@ public class ExecReducer extends MapRedu
}
reducer.close(abort);
- reportStats rps = new reportStats(rp);
+ ReportStats rps = new ReportStats(rp);
reducer.preorderMap(rps);
} catch (Exception e) {
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java Thu Aug 7 00:21:45 2014
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator;
@@ -225,7 +225,7 @@ public class MapRecordProcessor extends
if (isLogInfoEnabled) {
logCloseInfo();
}
- reportStats rps = new reportStats(reporter);
+ ReportStats rps = new ReportStats(reporter);
mapOp.preorderMap(rps);
return;
} catch (Exception e) {
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java Thu Aug 7 00:21:45 2014
@@ -35,7 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector;
import org.apache.hadoop.hive.ql.exec.tez.tools.InputMerger;
@@ -136,7 +136,7 @@ public class ReduceRecordProcessor exte
reducer.setParentOperators(null); // clear out any parents as reducer is the
// root
isTagged = redWork.getNeedsTagging();
- vectorized = redWork.getVectorModeOn() != null;
+ vectorized = redWork.getVectorMode();
try {
keyTableDesc = redWork.getKeyDesc();
@@ -519,7 +519,7 @@ public class ReduceRecordProcessor exte
dummyOp.close(abort);
}
}
- reportStats rps = new reportStats(reporter);
+ ReportStats rps = new ReportStats(reporter);
reducer.preorderMap(rps);
} catch (Exception e) {
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Thu Aug 7 00:21:45 2014
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.serde2.Col
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.util.StringUtils;
+import parquet.column.ColumnDescriptor;
import parquet.hadoop.api.ReadSupport;
import parquet.io.api.RecordMaterializer;
import parquet.schema.MessageType;
@@ -46,8 +47,8 @@ public class DataWritableReadSupport ext
private static final String TABLE_SCHEMA = "table_schema";
public static final String HIVE_SCHEMA_KEY = "HIVE_TABLE_SCHEMA";
- public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access";
-
+ public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access";
+
/**
* From a string which columns names (including hive column), return a list
* of string columns
@@ -75,12 +76,16 @@ public class DataWritableReadSupport ext
final Map<String, String> contextMetadata = new HashMap<String, String>();
if (columns != null) {
final List<String> listColumns = getColumns(columns);
-
+ final Map<String, String> lowerCaseFileSchemaColumns = new HashMap<String,String>();
+ for (ColumnDescriptor c : fileSchema.getColumns()) {
+ lowerCaseFileSchemaColumns.put(c.getPath()[0].toLowerCase(), c.getPath()[0]);
+ }
final List<Type> typeListTable = new ArrayList<Type>();
- for (final String col : listColumns) {
+ for (String col : listColumns) {
+ col = col.toLowerCase();
// listColumns contains partition columns which are metadata only
- if (fileSchema.containsField(col)) {
- typeListTable.add(fileSchema.getType(col));
+ if (lowerCaseFileSchemaColumns.containsKey(col)) {
+ typeListTable.add(fileSchema.getType(lowerCaseFileSchemaColumns.get(col)));
} else {
// below allows schema evolution
typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
@@ -93,10 +98,24 @@ public class DataWritableReadSupport ext
final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);
final List<Type> typeListWanted = new ArrayList<Type>();
+ final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
for (final Integer idx : indexColumnsWanted) {
- typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
+ String col = listColumns.get(idx);
+ if (indexAccess) {
+ typeListWanted.add(tableSchema.getType(col));
+ } else {
+ col = col.toLowerCase();
+ if (lowerCaseFileSchemaColumns.containsKey(col)) {
+ typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col)));
+ } else {
+ // should never occur?
+ String msg = "Column " + col + " at index " + idx + " does not exist in " +
+ lowerCaseFileSchemaColumns;
+ throw new IllegalStateException(msg);
+ }
+ }
}
- requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
+ requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
typeListWanted), fileSchema, configuration);
return new ReadContext(requestedSchemaByUser, contextMetadata);
@@ -127,29 +146,24 @@ public class DataWritableReadSupport ext
}
final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
-
return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
}
-
+
/**
- * Determine the file column names based on the position within the requested columns and
+ * Determine the file column names based on the position within the requested columns and
* use that as the requested schema.
*/
- private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
+ private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
Configuration configuration) {
- if(configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
+ if (configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
final List<String> listColumns = getColumns(configuration.get(IOConstants.COLUMNS));
-
List<Type> requestedTypes = new ArrayList<Type>();
-
for(Type t : requestedSchema.getFields()) {
int index = listColumns.indexOf(t.getName());
requestedTypes.add(fileSchema.getType(index));
}
-
requestedSchema = new MessageType(requestedSchema.getName(), requestedTypes);
}
-
return requestedSchema;
}
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Thu Aug 7 00:21:45 2014
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
@@ -62,9 +63,12 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -107,6 +111,12 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.util.ReflectionUtils;
public class Vectorizer implements PhysicalPlanResolver {
@@ -256,7 +266,15 @@ public class Vectorizer implements Physi
class VectorizationDispatcher implements Dispatcher {
+ private PhysicalContext pctx;
+
+ private int keyColCount;
+ private int valueColCount;
+
public VectorizationDispatcher(PhysicalContext pctx) {
+ this.pctx = pctx;
+ keyColCount = 0;
+ valueColCount = 0;
}
@Override
@@ -270,6 +288,9 @@ public class Vectorizer implements Physi
for (BaseWork w: work.getAllWork()) {
if (w instanceof MapWork) {
convertMapWork((MapWork)w);
+ } else if (w instanceof ReduceWork) {
+ // We are only vectorizing Reduce under Tez.
+ convertReduceWork((ReduceWork)w);
}
}
}
@@ -283,6 +304,13 @@ public class Vectorizer implements Physi
}
}
+ private void addMapWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
+ + FileSinkOperator.getOperatorName()), np);
+ opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
+ + ReduceSinkOperator.getOperatorName()), np);
+ }
+
private boolean validateMapWork(MapWork mapWork) throws SemanticException {
// Validate the input format
@@ -297,11 +325,8 @@ public class Vectorizer implements Physi
}
}
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
- ValidationNodeProcessor vnp = new ValidationNodeProcessor();
- opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
- + FileSinkOperator.getOperatorName()), vnp);
- opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
- + ReduceSinkOperator.getOperatorName()), vnp);
+ MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor();
+ addMapWorkRules(opRules, vnp);
Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
// iterator the mapper operator tree
@@ -320,14 +345,11 @@ public class Vectorizer implements Physi
}
private void vectorizeMapWork(MapWork mapWork) throws SemanticException {
- LOG.info("Vectorizing task...");
+ LOG.info("Vectorizing MapWork...");
mapWork.setVectorMode(true);
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
- VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mapWork);
- opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" +
- ReduceSinkOperator.getOperatorName()), vnp);
- opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
- + FileSinkOperator.getOperatorName()), vnp);
+ MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork);
+ addMapWorkRules(opRules, vnp);
Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
GraphWalker ogw = new PreOrderWalker(disp);
// iterator the mapper operator tree
@@ -348,9 +370,114 @@ public class Vectorizer implements Physi
return;
}
+
+ private void convertReduceWork(ReduceWork reduceWork) throws SemanticException {
+ boolean ret = validateReduceWork(reduceWork);
+ if (ret) {
+ vectorizeReduceWork(reduceWork);
+ }
+ }
+
+ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException {
+ try {
+ // Check key ObjectInspector.
+ ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector();
+ if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector;
+ keyColCount = keyStructObjectInspector.getAllStructFieldRefs().size();
+
+ // Tez doesn't use tagging...
+ if (reduceWork.getNeedsTagging()) {
+ return false;
+ }
+
+ // Check value ObjectInspector.
+ ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector();
+ if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector;
+ valueColCount = valueStructObjectInspector.getAllStructFieldRefs().size();
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
+ return true;
+ }
+
+ private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", ExtractOperator.getOperatorName() + ".*"), np);
+ opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + ".*"), np);
+ }
+
+ private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException {
+ // Validate input to ReduceWork.
+ if (!getOnlyStructObjectInspectors(reduceWork)) {
+ return false;
+ }
+ // Now check the reduce operator tree.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor();
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+ for (Node n : nodeOutput.keySet()) {
+ if (nodeOutput.get(n) != null) {
+ if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException {
+ LOG.info("Vectorizing ReduceWork...");
+ reduceWork.setVectorMode(true);
+
+ // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as expected.
+ // We need to descend down, otherwise it breaks our algorithm that determines VectorizationContext...
+ // Do we use PreOrderWalker instead of DefaultGraphWalker.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkVectorizationNodeProcessor vnp = new ReduceWorkVectorizationNodeProcessor(reduceWork, keyColCount, valueColCount);
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new PreOrderWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ LOG.info("vectorizeReduceWork reducer Operator: " + reduceWork.getReducer().getName() + "...");
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+
+ // Necessary since we are vectorizing the root operator in reduce.
+ reduceWork.setReducer(vnp.getRootVectorOp());
+
+ Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
+ if (reducer.getType().equals(OperatorType.EXTRACT)) {
+ ((VectorExtractOperator)reducer).setKeyAndValueColCounts(keyColCount, valueColCount);
+ }
+
+ Map<String, Map<Integer, String>> columnVectorTypes = vnp.getScratchColumnVectorTypes();
+ reduceWork.setScratchColumnVectorTypes(columnVectorTypes);
+ Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
+ reduceWork.setScratchColumnMap(columnMap);
+
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString()));
+ LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+ }
+ }
}
- class ValidationNodeProcessor implements NodeProcessor {
+ class MapWorkValidationNodeProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
@@ -361,9 +488,9 @@ public class Vectorizer implements Physi
op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
return new Boolean(true);
}
- boolean ret = validateOperator(op);
+ boolean ret = validateMapWorkOperator(op);
if (!ret) {
- LOG.info("Operator: " + op.getName() + " could not be vectorized.");
+ LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
return new Boolean(false);
}
}
@@ -371,24 +498,37 @@ public class Vectorizer implements Physi
}
}
- class VectorizationNodeProcessor implements NodeProcessor {
+ class ReduceWorkValidationNodeProcessor implements NodeProcessor {
- private final MapWork mWork;
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ for (Node n : stack) {
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+ boolean ret = validateReduceWorkOperator(op);
+ if (!ret) {
+ LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized.");
+ return new Boolean(false);
+ }
+ }
+ return new Boolean(true);
+ }
+ }
+
+ // This class has common code used by both MapWorkVectorizationNodeProcessor and
+ // ReduceWorkVectorizationNodeProcessor.
+ class VectorizationNodeProcessor implements NodeProcessor {
// This is used to extract scratch column types for each file key
- private final Map<String, VectorizationContext> scratchColumnContext =
+ protected final Map<String, VectorizationContext> scratchColumnContext =
new HashMap<String, VectorizationContext>();
- private final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
+ protected final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
new HashMap<Operator<? extends OperatorDesc>, VectorizationContext>();
- private final Set<Operator<? extends OperatorDesc>> opsDone =
+ protected final Set<Operator<? extends OperatorDesc>> opsDone =
new HashSet<Operator<? extends OperatorDesc>>();
- public VectorizationNodeProcessor(MapWork mWork) {
- this.mWork = mWork;
- }
-
public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
Map<String, Map<Integer, String>> scratchColumnVectorTypes =
new HashMap<String, Map<Integer, String>>();
@@ -411,16 +551,90 @@ public class Vectorizer implements Physi
return scratchColumnMap;
}
+ public VectorizationContext walkStackToFindVectorizationContext(Stack<Node> stack, Operator<? extends OperatorDesc> op)
+ throws SemanticException {
+ VectorizationContext vContext = null;
+ if (stack.size() <= 1) {
+ throw new SemanticException(String.format("Expected operator stack for operator %s to have at least 2 operators", op.getName()));
+ }
+ // Walk down the stack of operators until we found one willing to give us a context.
+ // At the bottom will be the root operator, guaranteed to have a context
+ int i= stack.size()-2;
+ while (vContext == null) {
+ if (i < 0) {
+ throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName()));
+ }
+ Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
+ vContext = vContextsByTSOp.get(opParent);
+ --i;
+ }
+ return vContext;
+ }
+
+ public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) {
+ Operator<? extends OperatorDesc> currentOp = op;
+ while (currentOp.getParentOperators().size() > 0) {
+ currentOp = currentOp.getParentOperators().get(0);
+ if (currentOp.getType().equals(OperatorType.GROUPBY)) {
+ // No need to vectorize
+ if (!opsDone.contains(op)) {
+ opsDone.add(op);
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public Operator<? extends OperatorDesc> doVectorize(Operator<? extends OperatorDesc> op, VectorizationContext vContext)
+ throws SemanticException {
+ Operator<? extends OperatorDesc> vectorOp = op;
+ try {
+ if (!opsDone.contains(op)) {
+ vectorOp = vectorizeOperator(op, vContext);
+ opsDone.add(op);
+ if (vectorOp != op) {
+ opsDone.add(vectorOp);
+ }
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+ VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
+ vContextsByTSOp.put(op, vOutContext);
+ scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
+ }
+ }
+ } catch (HiveException e) {
+ throw new SemanticException(e);
+ }
+ return vectorOp;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ throw new SemanticException("Must be overridden");
+ }
+ }
+
+ class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final MapWork mWork;
+
+ public MapWorkVectorizationNodeProcessor(MapWork mWork) {
+ this.mWork = mWork;
+ }
+
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+ LOG.info("MapWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "...");
VectorizationContext vContext = null;
if (op instanceof TableScanOperator) {
- vContext = getVectorizationContext((TableScanOperator) op, physicalContext);
+ vContext = getVectorizationContext(op, physicalContext);
for (String onefile : mWork.getPathToAliases().keySet()) {
List<String> aliases = mWork.getPathToAliases().get(onefile);
for (String alias : aliases) {
@@ -438,45 +652,76 @@ public class Vectorizer implements Physi
}
vContextsByTSOp.put(op, vContext);
} else {
- assert stack.size() > 1;
- // Walk down the stack of operators until we found one willing to give us a context.
- // At the bottom will be the TS operator, guaranteed to have a context
- int i= stack.size()-2;
- while (vContext == null) {
- Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
- vContext = vContextsByTSOp.get(opParent);
- --i;
- }
+ vContext = walkStackToFindVectorizationContext(stack, op);
}
assert vContext != null;
- if ((op.getType().equals(OperatorType.REDUCESINK) || op.getType().equals(OperatorType.FILESINK)) &&
- op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
- // No need to vectorize
- if (!opsDone.contains(op)) {
- opsDone.add(op);
- }
+ // Currently, Vectorized GROUPBY outputs rows, not vectorized row batchs. So, don't vectorize
+ // any operators below GROUPBY.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return null;
+ }
+
+ doVectorize(op, vContext);
+
+ return null;
+ }
+ }
+
+ class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final ReduceWork rWork;
+ private int keyColCount;
+ private int valueColCount;
+ private Map<String, Integer> reduceColumnNameMap;
+
+ private Operator<? extends OperatorDesc> rootVectorOp;
+
+ public Operator<? extends OperatorDesc> getRootVectorOp() {
+ return rootVectorOp;
+ }
+
+ public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, int valueColCount) {
+ this.rWork = rWork;
+ reduceColumnNameMap = rWork.getReduceColumnNameMap();
+ this.keyColCount = keyColCount;
+ this.valueColCount = valueColCount;
+ rootVectorOp = null;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+ LOG.info("ReduceWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "...");
+
+ VectorizationContext vContext = null;
+
+ boolean saveRootVectorOp = false;
+
+ if (op.getParentOperators().size() == 0) {
+ vContext = getReduceVectorizationContext(reduceColumnNameMap);
+ vContextsByTSOp.put(op, vContext);
+ saveRootVectorOp = true;
} else {
- try {
- if (!opsDone.contains(op)) {
- Operator<? extends OperatorDesc> vectorOp =
- vectorizeOperator(op, vContext);
- opsDone.add(op);
- if (vectorOp != op) {
- opsDone.add(vectorOp);
- }
- if (vectorOp instanceof VectorizationContextRegion) {
- VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
- VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
- vContextsByTSOp.put(op, vOutContext);
- scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
- }
- }
- } catch (HiveException e) {
- throw new SemanticException(e);
- }
+ vContext = walkStackToFindVectorizationContext(stack, op);
+ }
+
+ assert vContext != null;
+
+ // Currently, Vectorized GROUPBY outputs rows, not vectorized row batchs. So, don't vectorize
+ // any operators below GROUPBY.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return null;
}
+
+ Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext);
+ if (saveRootVectorOp && op != vectorOp) {
+ rootVectorOp = vectorOp;
+ }
+
return null;
}
}
@@ -519,7 +764,7 @@ public class Vectorizer implements Physi
return pctx;
}
- boolean validateOperator(Operator<? extends OperatorDesc> op) {
+ boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op) {
boolean ret = false;
switch (op.getType()) {
case MAPJOIN:
@@ -555,6 +800,32 @@ public class Vectorizer implements Physi
return ret;
}
+ boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
+ boolean ret = false;
+ switch (op.getType()) {
+ case EXTRACT:
+ ret = validateExtractOperator((ExtractOperator) op);
+ break;
+ case FILTER:
+ ret = validateFilterOperator((FilterOperator) op);
+ break;
+ case SELECT:
+ ret = validateSelectOperator((SelectOperator) op);
+ break;
+ case REDUCESINK:
+ ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+ break;
+ case FILESINK:
+ case LIMIT:
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ return ret;
+ }
+
private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
SMBJoinDesc desc = op.getConf();
// Validation is the same as for map join, since the 'small' tables are not vectorized
@@ -617,6 +888,15 @@ public class Vectorizer implements Physi
return validateAggregationDesc(op.getConf().getAggregators());
}
+ private boolean validateExtractOperator(ExtractOperator op) {
+ ExprNodeDesc expr = op.getConf().getCol();
+ boolean ret = validateExprNodeDesc(expr);
+ if (!ret) {
+ return false;
+ }
+ return true;
+ }
+
private boolean validateExprNodeDesc(List<ExprNodeDesc> descs) {
return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION);
}
@@ -728,7 +1008,7 @@ public class Vectorizer implements Physi
return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
}
- private VectorizationContext getVectorizationContext(TableScanOperator op,
+ private VectorizationContext getVectorizationContext(Operator op,
PhysicalContext pctx) {
RowSchema rs = op.getSchema();
@@ -741,8 +1021,26 @@ public class Vectorizer implements Physi
}
}
- VectorizationContext vc = new VectorizationContext(cmap, columnCount);
- return vc;
+ return new VectorizationContext(cmap, columnCount);
+ }
+
+ private VectorizationContext getReduceVectorizationContext(Map<String, Integer> reduceColumnNameMap) {
+ return new VectorizationContext(reduceColumnNameMap, reduceColumnNameMap.size());
+ }
+
+ private void fixupParentChildOperators(Operator<? extends OperatorDesc> op, Operator<? extends OperatorDesc> vectorOp) {
+ if (op.getParentOperators() != null) {
+ vectorOp.setParentOperators(op.getParentOperators());
+ for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
+ p.replaceChild(op, vectorOp);
+ }
+ }
+ if (op.getChildOperators() != null) {
+ vectorOp.setChildOperators(op.getChildOperators());
+ for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
+ c.replaceParent(op, vectorOp);
+ }
+ }
}
Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
@@ -757,6 +1055,7 @@ public class Vectorizer implements Physi
case FILESINK:
case REDUCESINK:
case LIMIT:
+ case EXTRACT:
vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
break;
default:
@@ -765,18 +1064,7 @@ public class Vectorizer implements Physi
}
if (vectorOp != op) {
- if (op.getParentOperators() != null) {
- vectorOp.setParentOperators(op.getParentOperators());
- for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
- p.replaceChild(op, vectorOp);
- }
- }
- if (op.getChildOperators() != null) {
- vectorOp.setChildOperators(op.getChildOperators());
- for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
- c.replaceParent(op, vectorOp);
- }
- }
+ fixupParentChildOperators(op, vectorOp);
((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
}
return vectorOp;