You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2011/08/16 06:25:45 UTC

svn commit: r1158104 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: nzhang
Date: Tue Aug 16 04:25:44 2011
New Revision: 1158104

URL: http://svn.apache.org/viewvc?rev=1158104&view=rev
Log:
HIVE-1916. Change Default Alias For Aggregated Columns (_c1) (sameerm via nzhang)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q
    hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1158104&r1=1158103&r2=1158104&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Aug 16 04:25:44 2011
@@ -461,6 +461,11 @@ public class HiveConf extends Configurat
     HIVE_REWORK_MAPREDWORK("hive.rework.mapredwork", false),
     HIVE_CONCATENATE_CHECK_INDEX ("hive.exec.concatenate.check.index", true),
 
+    //prefix used to auto generated column aliases
+    HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c"),
+    HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME(
+                               "hive.autogen.columnalias.prefix.includefuncname", false),
+
     // The class responsible for logging client side performance metrics
     // Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger
     HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger"),

Modified: hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml?rev=1158104&r1=1158103&r2=1158104&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml (original)
+++ hive/trunk/conf/hive-default.xml Tue Aug 16 04:25:44 2011
@@ -1145,6 +1145,19 @@
 </property>
 
 <property>
+  <name>hive.autogen.columnalias.prefix.label</name>
+  <value>_c</value>
+  <description>String used as a prefix when auto generating column alias. 
+  By default the prefix label will be appended with a column position number to form the column alias. Auto generation would happen if an aggregate function is used in a select clause without an explicit alias.</description>
+</property>
+
+<property>
+  <name>hive.autogen.columnalias.prefix.includefuncname</name>
+  <value>false</value>
+  <description>Whether to include function name in the column alias auto generated by hive.</description>
+</property>
+
+<property>
   <name>hive.exec.perf.logger</name>
   <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
   <description>The class responsible logging client side performance metrics.  Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger</description>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1158104&r1=1158103&r2=1158104&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Aug 16 04:25:44 2011
@@ -200,6 +200,13 @@ public class SemanticAnalyzer extends Ba
   private final UnparseTranslator unparseTranslator;
   private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx();
 
+  //prefix for column names auto generated by hive
+  private final String autogenColAliasPrfxLbl;
+  private final boolean autogenColAliasPrfxIncludeFuncName;
+
+  //Max characters when auto generating the column name with func name
+  private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
+
   public static class GlobalLimitCtx {
     private boolean enable = false;
     private int globalLimit = -1;
@@ -268,6 +275,10 @@ public class SemanticAnalyzer extends Ba
     groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
     prunedPartitions = new HashMap<String, PrunedPartitionList>();
     unparseTranslator = new UnparseTranslator();
+    autogenColAliasPrfxLbl = HiveConf.getVar(conf,
+                                HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
+    autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
+                         HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
   }
 
   @Override
@@ -1936,7 +1947,7 @@ public class SemanticAnalyzer extends Ba
   }
 
   private static String[] getColAlias(ASTNode selExpr, String defaultName,
-      RowResolver inputRR) {
+      RowResolver inputRR, boolean includeFuncName, int colNum) {
     String colAlias = null;
     String tabAlias = null;
     String[] colRef = new String[2];
@@ -1973,9 +1984,29 @@ public class SemanticAnalyzer extends Ba
       }
     }
 
+    //if specified generate alias using func name
+    if(includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)){
+
+      String expr_flattened = root.toStringTree();
+
+      //remove all TOK tokens
+      String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", "");
+
+      //remove all non alphanumeric letters, replace whitespace spans with underscore
+      String  expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
+
+      //limit length to 20 chars
+      if(expr_formatted.length()>AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
+        expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
+      }
+
+      //append colnum to make it unique
+      colAlias = expr_formatted.concat("_" + colNum);
+    }
+
     if (colAlias == null) {
       // Return defaultName if selExpr is not a simple xx.yy.zz
-      colAlias = defaultName;
+      colAlias = defaultName + colNum;
     }
 
     colRef[0] = tabAlias;
@@ -2151,18 +2182,20 @@ public class SemanticAnalyzer extends Ba
 
       if (isInTransform || isUDTF) {
         tabAlias = null;
-        colAlias = "_C" + i;
+        colAlias = autogenColAliasPrfxLbl + i;
         expr = child;
       } else {
-        String[] colRef = getColAlias(child, "_C" + i, inputRR);
+        // Get rid of TOK_SELEXPR
+        expr = (ASTNode) child.getChild(0);
+        String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
+                                             autogenColAliasPrfxIncludeFuncName, i);
         tabAlias = colRef[0];
         colAlias = colRef[1];
         if (hasAsClause) {
           unparseTranslator.addIdentifierTranslation((ASTNode) child
               .getChild(1));
         }
-        // Get rid of TOK_SELEXPR
-        expr = (ASTNode) child.getChild(0);
+
       }
 
       if (expr.getType() == HiveParser.TOK_ALLCOLREF) {

Added: hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q?rev=1158104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/autogen_colalias.q Tue Aug 16 04:25:44 2011
@@ -0,0 +1,22 @@
+CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax';
+
+create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key;
+describe dest_grouped_old1;
+
+create table dest_grouped_old2 as select distinct src.key from src;
+describe dest_grouped_old2;
+
+set hive.autogen.columnalias.prefix.label=column_;
+set hive.autogen.columnalias.prefix.includefuncname=true;
+
+create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+	(src.key +5) % 2,
+0)
+from src group by src.key;
+describe dest_grouped_new1;
+
+create table dest_grouped_new2 as select distinct src.key from src;
+describe dest_grouped_new2;

Added: hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out?rev=1158104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/autogen_colalias.q.out Tue Aug 16 04:25:44 2011
@@ -0,0 +1,82 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'
+PREHOOK: type: CREATEFUNCTION
+POSTHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'
+POSTHOOK: type: CREATEFUNCTION
+PREHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_old1
+PREHOOK: query: describe dest_grouped_old1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_old1
+POSTHOOK: type: DESCTABLE
+_c0	int	
+zz	int	
+key	string	
+_c3	int	
+_c4	bigint	
+_c5	double	
+_c6	bigint	
+_c7	bigint	
+_c8	int	
+_c9	int	
+PREHOOK: query: create table dest_grouped_old2 as select distinct src.key from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_old2 as select distinct src.key from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_old2
+PREHOOK: query: describe dest_grouped_old2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_old2
+POSTHOOK: type: DESCTABLE
+key	string	
+PREHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+	(src.key +5) % 2,
+0)
+from src group by src.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+	(src.key +5) % 2,
+0)
+from src group by src.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_new1
+PREHOOK: query: describe dest_grouped_new1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_new1
+POSTHOOK: type: DESCTABLE
+column_0	int	
+zz	int	
+column_2	double	
+test_max_length_src__3	int	
+count_src_value_4	bigint	
+sin_count_src_value_5	double	
+count_sin_src_value_6	bigint	
+unix_timestamp_7	bigint	
+sum_if_value_10_valu_8	int	
+if_src_key_10_src_ke_9	double	
+PREHOOK: query: create table dest_grouped_new2 as select distinct src.key from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_new2 as select distinct src.key from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_new2
+PREHOOK: query: describe dest_grouped_new2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_new2
+POSTHOOK: type: DESCTABLE
+key	string