You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2011/08/16 06:25:45 UTC
svn commit: r1158104 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: nzhang
Date: Tue Aug 16 04:25:44 2011
New Revision: 1158104
URL: http://svn.apache.org/viewvc?rev=1158104&view=rev
Log:
HIVE-1916. Change Default Alias For Aggregated Columns (_c1) (sameerm via nzhang)
Added:
hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q
hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1158104&r1=1158103&r2=1158104&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Aug 16 04:25:44 2011
@@ -461,6 +461,11 @@ public class HiveConf extends Configurat
HIVE_REWORK_MAPREDWORK("hive.rework.mapredwork", false),
HIVE_CONCATENATE_CHECK_INDEX ("hive.exec.concatenate.check.index", true),
+ //prefix used to auto generated column aliases
+ HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c"),
+ HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME(
+ "hive.autogen.columnalias.prefix.includefuncname", false),
+
// The class responsible for logging client side performance metrics
// Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger
HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger"),
Modified: hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml?rev=1158104&r1=1158103&r2=1158104&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml (original)
+++ hive/trunk/conf/hive-default.xml Tue Aug 16 04:25:44 2011
@@ -1145,6 +1145,19 @@
</property>
<property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column alias.
+ By default the prefix label will be appended with a column position number to form the column alias. Auto generation would happen if an aggregate function is used in a select clause without an explicit alias.</description>
+</property>
+
+<property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias auto generated by hive.</description>
+</property>
+
+<property>
<name>hive.exec.perf.logger</name>
<value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
<description>The class responsible logging client side performance metrics. Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger</description>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1158104&r1=1158103&r2=1158104&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Aug 16 04:25:44 2011
@@ -200,6 +200,13 @@ public class SemanticAnalyzer extends Ba
private final UnparseTranslator unparseTranslator;
private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx();
+ //prefix for column names auto generated by hive
+ private final String autogenColAliasPrfxLbl;
+ private final boolean autogenColAliasPrfxIncludeFuncName;
+
+ //Max characters when auto generating the column name with func name
+ private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
+
public static class GlobalLimitCtx {
private boolean enable = false;
private int globalLimit = -1;
@@ -268,6 +275,10 @@ public class SemanticAnalyzer extends Ba
groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
prunedPartitions = new HashMap<String, PrunedPartitionList>();
unparseTranslator = new UnparseTranslator();
+ autogenColAliasPrfxLbl = HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
+ autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
}
@Override
@@ -1936,7 +1947,7 @@ public class SemanticAnalyzer extends Ba
}
private static String[] getColAlias(ASTNode selExpr, String defaultName,
- RowResolver inputRR) {
+ RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
@@ -1973,9 +1984,29 @@ public class SemanticAnalyzer extends Ba
}
}
+ //if specified generate alias using func name
+ if(includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)){
+
+ String expr_flattened = root.toStringTree();
+
+ //remove all TOK tokens
+ String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", "");
+
+ //remove all non alphanumeric letters, replace whitespace spans with underscore
+ String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
+
+ //limit length to 20 chars
+ if(expr_formatted.length()>AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
+ expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
+ }
+
+ //append colnum to make it unique
+ colAlias = expr_formatted.concat("_" + colNum);
+ }
+
if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
- colAlias = defaultName;
+ colAlias = defaultName + colNum;
}
colRef[0] = tabAlias;
@@ -2151,18 +2182,20 @@ public class SemanticAnalyzer extends Ba
if (isInTransform || isUDTF) {
tabAlias = null;
- colAlias = "_C" + i;
+ colAlias = autogenColAliasPrfxLbl + i;
expr = child;
} else {
- String[] colRef = getColAlias(child, "_C" + i, inputRR);
+ // Get rid of TOK_SELEXPR
+ expr = (ASTNode) child.getChild(0);
+ String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
+ autogenColAliasPrfxIncludeFuncName, i);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
- // Get rid of TOK_SELEXPR
- expr = (ASTNode) child.getChild(0);
+
}
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
Added: hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q?rev=1158104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q Tue Aug 16 04:25:44 2011
@@ -0,0 +1,22 @@
+CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax';
+
+create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key;
+describe dest_grouped_old1;
+
+create table dest_grouped_old2 as select distinct src.key from src;
+describe dest_grouped_old2;
+
+set hive.autogen.columnalias.prefix.label=column_;
+set hive.autogen.columnalias.prefix.includefuncname=true;
+
+create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+ (src.key +5) % 2,
+0)
+from src group by src.key;
+describe dest_grouped_new1;
+
+create table dest_grouped_new2 as select distinct src.key from src;
+describe dest_grouped_new2;
Added: hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out?rev=1158104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out Tue Aug 16 04:25:44 2011
@@ -0,0 +1,82 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'
+PREHOOK: type: CREATEFUNCTION
+POSTHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'
+POSTHOOK: type: CREATEFUNCTION
+PREHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_old1
+PREHOOK: query: describe dest_grouped_old1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_old1
+POSTHOOK: type: DESCTABLE
+_c0 int
+zz int
+key string
+_c3 int
+_c4 bigint
+_c5 double
+_c6 bigint
+_c7 bigint
+_c8 int
+_c9 int
+PREHOOK: query: create table dest_grouped_old2 as select distinct src.key from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_old2 as select distinct src.key from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_old2
+PREHOOK: query: describe dest_grouped_old2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_old2
+POSTHOOK: type: DESCTABLE
+key string
+PREHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+ (src.key +5) % 2,
+0)
+from src group by src.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+ (src.key +5) % 2,
+0)
+from src group by src.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_new1
+PREHOOK: query: describe dest_grouped_new1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_new1
+POSTHOOK: type: DESCTABLE
+column_0 int
+zz int
+column_2 double
+test_max_length_src__3 int
+count_src_value_4 bigint
+sin_count_src_value_5 double
+count_sin_src_value_6 bigint
+unix_timestamp_7 bigint
+sum_if_value_10_valu_8 int
+if_src_key_10_src_ke_9 double
+PREHOOK: query: create table dest_grouped_new2 as select distinct src.key from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_new2 as select distinct src.key from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_new2
+PREHOOK: query: describe dest_grouped_new2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_new2
+POSTHOOK: type: DESCTABLE
+key string