You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/06/07 14:41:21 UTC
svn commit: r1490614 - in /hive/branches/HIVE-4115: ./ data/files/
hbase-handler/src/test/templates/ hcatalog/bin/
hcatalog/src/test/e2e/hcatalog/drivers/ hcatalog/src/test/e2e/hcatalog/tests/
ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/or...
Author: amareshwari
Date: Fri Jun 7 12:41:20 2013
New Revision: 1490614
URL: http://svn.apache.org/r1490614
Log:
Merging r1489797 through r1490612 into HIVE-4115
Added:
hive/branches/HIVE-4115/data/files/person age.txt
- copied unchanged from r1490612, hive/trunk/data/files/person age.txt
hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
- copied unchanged from r1490612, hive/trunk/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
- copied unchanged from r1490612, hive/trunk/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_file_with_space_in_the_name.q.out
- copied unchanged from r1490612, hive/trunk/ql/src/test/results/clientpositive/load_file_with_space_in_the_name.q.out
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_hdfs_file_with_space_in_the_name.q.out
- copied unchanged from r1490612, hive/trunk/ql/src/test/results/clientpositive/load_hdfs_file_with_space_in_the_name.q.out
Modified:
hive/branches/HIVE-4115/ (props changed)
hive/branches/HIVE-4115/build-common.xml
hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseCliDriver.vm
hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseNegativeCliDriver.vm
hive/branches/HIVE-4115/hcatalog/bin/hcat
hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHiveCmdLine.pm
hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_cmdline.conf
hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_nightly.conf
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/combine2_win.q
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/combine2_win.q.out
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_double.q.out
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_long.q.out
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_string.q.out
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/input_part10_win.q.out
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_dyn_part14_win.q.out
Propchange: hive/branches/HIVE-4115/
------------------------------------------------------------------------------
Merged /hive/trunk:r1489797-1490612
Modified: hive/branches/HIVE-4115/build-common.xml
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/build-common.xml?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/build-common.xml (original)
+++ hive/branches/HIVE-4115/build-common.xml Fri Jun 7 12:41:20 2013
@@ -59,7 +59,7 @@
<property name="test.output" value="true"/>
<property name="test.junit.output.format" value="xml"/>
<property name="test.junit.output.usefile" value="true"/>
- <property name="minimr.query.files" value="list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,schemeAuthority.q,truncate_column_buckets.q,remote_script.q"/>
+ <property name="minimr.query.files" value="list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,schemeAuthority.q,truncate_column_buckets.q,remote_script.q,load_hdfs_file_with_space_in_the_name.q"/>
<property name="minimr.query.negative.files" value="cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q" />
<property name="test.silent" value="true"/>
<property name="hadoopVersion" value="${hadoop.version.ant-internal}"/>
Modified: hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseCliDriver.vm
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseCliDriver.vm?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseCliDriver.vm (original)
+++ hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseCliDriver.vm Fri Jun 7 12:41:20 2013
@@ -89,7 +89,7 @@ public class $className extends TestCase
#set ($fname = $qf.getName())
#set ($eidx = $fname.indexOf('.'))
#set ($tname = $fname.substring(0, $eidx))
- #set ($fpath = $qf.getCanonicalPath())
+ #set ($fpath = $qf.getCanonicalPath().replaceAll("\\","\\\\"))
public void testCliDriver_$tname() throws Exception {
runTest("$tname", "$fname", "$fpath");
}
Modified: hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseNegativeCliDriver.vm
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseNegativeCliDriver.vm?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseNegativeCliDriver.vm (original)
+++ hive/branches/HIVE-4115/hbase-handler/src/test/templates/TestHBaseNegativeCliDriver.vm Fri Jun 7 12:41:20 2013
@@ -68,7 +68,7 @@ public class $className extends TestCase
#set ($fname = $qf.getName())
#set ($eidx = $fname.indexOf('.'))
#set ($tname = $fname.substring(0, $eidx))
- #set ($fpath = $qf.getCanonicalPath())
+ #set ($fpath = $qf.getCanonicalPath().replaceAll("\\","\\\\"))
public void testCliDriver_$tname() throws Exception {
runTest("$tname", "$fname", "$fpath");
}
Modified: hive/branches/HIVE-4115/hcatalog/bin/hcat
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hcatalog/bin/hcat?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hcatalog/bin/hcat (original)
+++ hive/branches/HIVE-4115/hcatalog/bin/hcat Fri Jun 7 12:41:20 2013
@@ -91,7 +91,7 @@ if [ ! -d "$HIVE_LIB_DIR" ]; then
exit 4;
fi
-HIVE_CONF_DIR=${HIVE_HOME}/conf
+HIVE_CONF_DIR=${HIVE_CONF_DIR:-$HIVE_HOME/conf}
if [ ! -d "$HIVE_CONF_DIR" ]; then
echo "Cannot find conf dir within HIVE_HOME : $HIVE_CONF_DIR";
exit 4;
Modified: hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHiveCmdLine.pm
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHiveCmdLine.pm?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHiveCmdLine.pm (original)
+++ hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHiveCmdLine.pm Fri Jun 7 12:41:20 2013
@@ -1,26 +1,30 @@
-package TestDriverHiveCmdLine;
+#!/usr/bin/env perl
-############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+################################################################################
+
-###############################################################################
# Test driver for hive nightly tests.
#
#
+package TestDriverHiveCmdLine;
use TestDriverHive;
use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
use Util;
Modified: hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_cmdline.conf
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_cmdline.conf?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_cmdline.conf (original)
+++ hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_cmdline.conf Fri Jun 7 12:41:20 2013
@@ -1,25 +1,24 @@
#!/usr/bin/env perl
-############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+################################################################################
-###############################################################################
-# Nightly tests for hive.
-#
-#
$cfg = {
'driver' => 'HiveCmdLine',
Modified: hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_nightly.conf
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_nightly.conf?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_nightly.conf (original)
+++ hive/branches/HIVE-4115/hcatalog/src/test/e2e/hcatalog/tests/hive_nightly.conf Fri Jun 7 12:41:20 2013
@@ -1,25 +1,23 @@
#!/usr/bin/env perl
-############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
-###############################################################################
-# Nightly tests for hive.
-#
-#
+################################################################################
$cfg = {
'driver' => 'Hive',
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java Fri Jun 7 12:41:20 2013
@@ -27,6 +27,8 @@ import java.util.List;
import java.util.Map;
import org.antlr.runtime.tree.Tree;
+import org.apache.commons.httpclient.URIException;
+import org.apache.commons.httpclient.util.URIUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -82,7 +84,7 @@ public class LoadSemanticAnalyzer extend
// directory
if (!path.startsWith("/")) {
if (isLocal) {
- path = new Path(System.getProperty("user.dir"), path).toUri().toString();
+ path = URIUtil.decode( new Path(System.getProperty("user.dir"), path).toUri().toString() );
} else {
path = new Path(new Path("/user/" + System.getProperty("user.name")),
path).toString();
@@ -231,8 +233,13 @@ public class LoadSemanticAnalyzer extend
// that's just a test case.
String copyURIStr = ctx.getExternalTmpFileURI(toURI);
URI copyURI = URI.create(copyURIStr);
- rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr),
- conf);
+ try {
+ rTask = TaskFactory.get(new CopyWork(URIUtil.decode(fromURI.toString()), copyURIStr),
+ conf);
+ } catch (URIException e) {
+ throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
+ .getMessage()), e);
+ }
fromURI = copyURI;
}
@@ -261,8 +268,14 @@ public class LoadSemanticAnalyzer extend
}
- LoadTableDesc loadTableWork = new LoadTableDesc(fromURI.toString(),
- loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite);
+ LoadTableDesc loadTableWork;
+ try {
+ loadTableWork = new LoadTableDesc(URIUtil.decode(fromURI.toString()),
+ loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite);
+ } catch (URIException e1) {
+ throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e1
+ .getMessage()), e1);
+ }
Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(),
getOutputs(), loadTableWork, null, true), conf);
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java Fri Jun 7 12:41:20 2013
@@ -28,7 +28,13 @@ public class NumDistinctValueEstimator {
static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName());
- private final int bitVectorSize = 32;
+ /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number.
+ * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1.
+ * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise
+ * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1
+ * thus introducing errors in the estimates.
+ */
+ private static final int bitVectorSize = 31;
private int numBitVectors;
// Refer to Flajolet-Martin'86 for the value of phi
@@ -53,8 +59,23 @@ public class NumDistinctValueEstimator {
a = new int[numBitVectors];
b = new int[numBitVectors];
- aValue = new Random(79798);
- bValue = new Random(34115);
+ /* Use a large prime number as a seed to the random number generator.
+ * Java's random number generator uses the Linear Congruential Generator to generate random
+ * numbers using the following recurrence relation,
+ *
+ * X(n+1) = (a X(n) + c ) mod m
+ *
+ * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48
+ * is not a prime number and hence the set of numbers from 0 to m don't form a finite field.
+ * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair
+ * wise independent.
+ *
+ * However, empirically passing in prime numbers as seeds seems to work better than when passing
+ * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime.
+ *
+ */
+ aValue = new Random(99397);
+ bValue = new Random(9876413);
for (int i = 0; i < numBitVectors; i++) {
int randVal;
@@ -76,11 +97,11 @@ public class NumDistinctValueEstimator {
b[i] = randVal;
if (a[i] < 0) {
- a[i] = a[i] + (1 << (bitVectorSize -1));
+ a[i] = a[i] + (1 << bitVectorSize - 1);
}
if (b[i] < 0) {
- b[i] = b[i] + (1 << (bitVectorSize -1));
+ b[i] = b[i] + (1 << bitVectorSize - 1);
}
}
}
@@ -197,8 +218,8 @@ public class NumDistinctValueEstimator {
}
private int generateHash(long v, int hashNum) {
- int mod = 1 << (bitVectorSize - 1) - 1;
- long tempHash = a[hashNum] * v + b[hashNum];
+ int mod = (1<<bitVectorSize) - 1;
+ long tempHash = a[hashNum] * v + b[hashNum];
tempHash %= mod;
int hash = (int) tempHash;
@@ -206,7 +227,7 @@ public class NumDistinctValueEstimator {
* Hence hash value has to be non-negative.
*/
if (hash < 0) {
- hash = hash + mod + 1;
+ hash = hash + mod;
}
return hash;
}
@@ -266,6 +287,7 @@ public class NumDistinctValueEstimator {
bitVector[hash%numBitVectors].set(index);
}
+
public void mergeEstimators(NumDistinctValueEstimator o) {
// Bitwise OR the bitvector with the bitvector in the agg buffer
for (int i=0; i<numBitVectors; i++) {
@@ -289,36 +311,22 @@ public class NumDistinctValueEstimator {
return ((long)numDistinctValues);
}
- /* We use two estimators - one due to Flajolet-Martin and a modification due to
- * Alon-Matias-Szegedy. FM uses the location of the least significant zero as an estimate of
- * log2(phi*ndvs).
- * AMS uses the location of the most significant one as an estimate of the log2(ndvs).
- * We average the two estimators with suitable modifications to obtain an estimate of ndvs.
+ /* We use the Flajolet-Martin estimator to estimate the number of distinct values.FM uses the
+ * location of the least significant zero as an estimate of log2(phi*ndvs).
*/
public long estimateNumDistinctValues() {
int sumLeastSigZero = 0;
- int sumMostSigOne = 0;
double avgLeastSigZero;
- double avgMostSigOne;
double numDistinctValues;
for (int i=0; i< numBitVectors; i++) {
int leastSigZero = bitVector[i].nextClearBit(0);
sumLeastSigZero += leastSigZero;
- int mostSigOne = bitVectorSize;
-
- for (int j=0; j< bitVectorSize; j++) {
- if (bitVector[i].get(j)) {
- mostSigOne = j;
- }
- }
- sumMostSigOne += mostSigOne;
}
avgLeastSigZero =
(double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(phi)/Math.log(2.0));
- avgMostSigOne = (double)(sumMostSigOne/(numBitVectors * 1.0));
- numDistinctValues = Math.pow(2.0, (avgMostSigOne + avgLeastSigZero)/2.0);
+ numDistinctValues = Math.pow(2.0, avgLeastSigZero);
return ((long)(numDistinctValues));
}
}
Modified: hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/combine2_win.q
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/combine2_win.q?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/combine2_win.q (original)
+++ hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/combine2_win.q Fri Jun 7 12:41:20 2013
@@ -11,6 +11,8 @@ set hive.merge.smallfiles.avgsize=0;
-- INCLUDE_OS_WINDOWS
-- included only on windows because of difference in file name encoding logic
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
+
create table combine2(key string) partitioned by (value string);
insert overwrite table combine2 partition(value)
Modified: hive/branches/HIVE-4115/ql/src/test/results/clientpositive/combine2_win.q.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/results/clientpositive/combine2_win.q.out?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/results/clientpositive/combine2_win.q.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/results/clientpositive/combine2_win.q.out Fri Jun 7 12:41:20 2013
@@ -1,8 +1,14 @@
PREHOOK: query: -- INCLUDE_OS_WINDOWS
+-- included only on windows because of difference in file name encoding logic
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
create table combine2(key string) partitioned by (value string)
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- INCLUDE_OS_WINDOWS
+-- included only on windows because of difference in file name encoding logic
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
create table combine2(key string) partitioned by (value string)
POSTHOOK: type: CREATETABLE
@@ -124,6 +130,7 @@ STAGE PLANS:
PREHOOK: query: select key, value from combine2 where value is not null order by key
PREHOOK: type: QUERY
+PREHOOK: Input: default@combine2
PREHOOK: Input: default@combine2@value=%7C
PREHOOK: Input: default@combine2@value=2010-04-21%2009%3A45%3A00
PREHOOK: Input: default@combine2@value=val_0
@@ -135,6 +142,7 @@ PREHOOK: Input: default@combine2@value=v
#### A masked pattern was here ####
POSTHOOK: query: select key, value from combine2 where value is not null order by key
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@combine2
POSTHOOK: Input: default@combine2@value=%7C
POSTHOOK: Input: default@combine2@value=2010-04-21%2009%3A45%3A00
POSTHOOK: Input: default@combine2@value=val_0
@@ -224,7 +232,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 1
partition_columns value
rawDataSize 2
@@ -270,7 +277,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 1
partition_columns value
rawDataSize 2
@@ -316,7 +322,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 3
partition_columns value
rawDataSize 3
@@ -362,7 +367,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 1
partition_columns value
rawDataSize 1
@@ -408,7 +412,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 1
partition_columns value
rawDataSize 1
@@ -454,7 +457,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 3
partition_columns value
rawDataSize 3
@@ -500,7 +502,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 1
partition_columns value
rawDataSize 1
@@ -546,7 +547,6 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.combine2
numFiles 1
- numPartitions 8
numRows 1
partition_columns value
rawDataSize 1
@@ -607,6 +607,15 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Truncated Path -> Alias:
+ /combine2/value=%7C [combine2]
+ /combine2/value=2010-04-21%2009%3A45%3A00 [combine2]
+ /combine2/value=val_0 [combine2]
+ /combine2/value=val_2 [combine2]
+ /combine2/value=val_4 [combine2]
+ /combine2/value=val_5 [combine2]
+ /combine2/value=val_8 [combine2]
+ /combine2/value=val_9 [combine2]
Stage: Stage-0
Fetch Operator
@@ -615,6 +624,7 @@ STAGE PLANS:
PREHOOK: query: select count(1) from combine2 where value is not null
PREHOOK: type: QUERY
+PREHOOK: Input: default@combine2
PREHOOK: Input: default@combine2@value=%7C
PREHOOK: Input: default@combine2@value=2010-04-21%2009%3A45%3A00
PREHOOK: Input: default@combine2@value=val_0
@@ -626,6 +636,7 @@ PREHOOK: Input: default@combine2@value=v
#### A masked pattern was here ####
POSTHOOK: query: select count(1) from combine2 where value is not null
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@combine2
POSTHOOK: Input: default@combine2@value=%7C
POSTHOOK: Input: default@combine2@value=2010-04-21%2009%3A45%3A00
POSTHOOK: Input: default@combine2@value=val_0
@@ -729,6 +740,7 @@ STAGE PLANS:
PREHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds
PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
@@ -736,6 +748,7 @@ PREHOOK: Input: default@srcpart@ds=2008-
#### A masked pattern was here ####
POSTHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
Modified: hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_double.q.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_double.q.out?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_double.q.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_double.q.out Fri Jun 7 12:41:20 2013
@@ -30,4 +30,4 @@ select compute_stats(a, 16) from tab_dou
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_double
#### A masked pattern was here ####
-{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":8}
+{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11}
Modified: hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_long.q.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_long.q.out?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_long.q.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_long.q.out Fri Jun 7 12:41:20 2013
@@ -30,4 +30,4 @@ select compute_stats(a, 16) from tab_int
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_int
#### A masked pattern was here ####
-{"columntype":"Long","min":0,"max":344,"countnulls":1,"numdistinctvalues":16}
+{"columntype":"Long","min":0,"max":344,"countnulls":1,"numdistinctvalues":11}
Modified: hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_string.q.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_string.q.out?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_string.q.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/results/clientpositive/compute_stats_string.q.out Fri Jun 7 12:41:20 2013
@@ -30,4 +30,4 @@ select compute_stats(a, 16) from tab_str
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_string
#### A masked pattern was here ####
-{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":5}
+{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7}
Modified: hive/branches/HIVE-4115/ql/src/test/results/clientpositive/input_part10_win.q.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/results/clientpositive/input_part10_win.q.out?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/results/clientpositive/input_part10_win.q.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/results/clientpositive/input_part10_win.q.out Fri Jun 7 12:41:20 2013
@@ -1,4 +1,5 @@
PREHOOK: query: -- INCLUDE_OS_WINDOWS
+-- included only on windows because of difference in file name encoding logic
CREATE TABLE part_special (
a STRING,
@@ -9,6 +10,7 @@ CREATE TABLE part_special (
)
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- INCLUDE_OS_WINDOWS
+-- included only on windows because of difference in file name encoding logic
CREATE TABLE part_special (
a STRING,
@@ -105,18 +107,26 @@ POSTHOOK: query: DESCRIBE EXTENDED part_
POSTHOOK: type: DESCTABLE
POSTHOOK: Lineage: part_special PARTITION(ds=2008 04 08,ts=10:11:12=455).a SIMPLE []
POSTHOOK: Lineage: part_special PARTITION(ds=2008 04 08,ts=10:11:12=455).b SIMPLE []
-a string
-b string
-ds string
-ts string
+a string None
+b string None
+ds string None
+ts string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+ts string None
#### A masked pattern was here ####
PREHOOK: query: SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455'
PREHOOK: type: QUERY
+PREHOOK: Input: default@part_special
PREHOOK: Input: default@part_special@ds=2008%2004%2008/ts=10%3A11%3A12%3D455
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455'
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_special
POSTHOOK: Input: default@part_special@ds=2008%2004%2008/ts=10%3A11%3A12%3D455
#### A masked pattern was here ####
POSTHOOK: Lineage: part_special PARTITION(ds=2008 04 08,ts=10:11:12=455).a SIMPLE []
Modified: hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_dyn_part14_win.q.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_dyn_part14_win.q.out?rev=1490614&r1=1490613&r2=1490614&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_dyn_part14_win.q.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/results/clientpositive/load_dyn_part14_win.q.out Fri Jun 7 12:41:20 2013
@@ -1,9 +1,13 @@
PREHOOK: query: -- INCLUDE_OS_WINDOWS
+-- included only on windows because of difference in file name encoding logic
+
create table if not exists nzhang_part14 (key string)
partitioned by (value string)
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- INCLUDE_OS_WINDOWS
+-- included only on windows because of difference in file name encoding logic
+
create table if not exists nzhang_part14 (key string)
partitioned by (value string)
@@ -13,8 +17,13 @@ PREHOOK: query: describe extended nzhang
PREHOOK: type: DESCTABLE
POSTHOOK: query: describe extended nzhang_part14
POSTHOOK: type: DESCTABLE
-key string
-value string
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+value string None
#### A masked pattern was here ####
PREHOOK: query: explain
@@ -42,14 +51,16 @@ ABSTRACT SYNTAX TREE:
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1, Stage-7, Stage-8
- Stage-6 depends on stages: Stage-2 , consists of Stage-5, Stage-4
+ Stage-2 depends on stages: Stage-1, Stage-9, Stage-10
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
Stage-5
- Stage-0 depends on stages: Stage-5, Stage-4
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
Stage-3 depends on stages: Stage-0
Stage-4
- Stage-7 is a root stage
- Stage-8 is a root stage
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-9 is a root stage
+ Stage-10 is a root stage
STAGE PLANS:
Stage: Stage-1
@@ -142,7 +153,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part14
- Stage: Stage-6
+ Stage: Stage-8
Conditional Operator
Stage: Stage-5
@@ -179,7 +190,26 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part14
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.nzhang_part14
+
Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-9
Map Reduce
Alias -> Map Operator Tree:
null-subquery2:t-subquery2:src
@@ -211,7 +241,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-8
+ Stage: Stage-10
Map Reduce
Alias -> Map Operator Tree:
null-subquery1-subquery1:t-subquery1-subquery1:src
@@ -280,12 +310,14 @@ value=__HIVE_DEFAULT_PARTITION__
PREHOOK: query: select * from nzhang_part14 where value <> 'a'
order by key, value
PREHOOK: type: QUERY
+PREHOOK: Input: default@nzhang_part14
PREHOOK: Input: default@nzhang_part14@value=%20
PREHOOK: Input: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__
#### A masked pattern was here ####
POSTHOOK: query: select * from nzhang_part14 where value <> 'a'
order by key, value
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nzhang_part14
POSTHOOK: Input: default@nzhang_part14@value=%20
POSTHOOK: Input: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__
#### A masked pattern was here ####