You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/02 06:49:20 UTC

svn commit: r1463380 - in /hive/trunk: ql/src/test/queries/clientpositive/serde_regex.q ql/src/test/results/clientpositive/serde_regex.q.out serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java

Author: hashutosh
Date: Tue Apr  2 04:49:19 2013
New Revision: 1463380

URL: http://svn.apache.org/r1463380
Log:
HIVE-3951 : Allow Decimal type columns in Regex Serde (Mark Grover via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q
    hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java

Modified: hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q?rev=1463380&r1=1463379&r2=1463380&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q Tue Apr  2 04:49:19 2013
@@ -39,3 +39,28 @@ SELECT * FROM serde_regex ORDER BY time;
 SELECT host, size, status, time from serde_regex ORDER BY time;
 
 DROP TABLE serde_regex;
+
+EXPLAIN
+CREATE TABLE serde_regex1(
+  key decimal,
+  value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+  "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE;
+
+CREATE TABLE serde_regex1(
+  key decimal,
+  value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+  "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1;
+
+SELECT key, value FROM serde_regex1 ORDER BY key;
+
+DROP TABLE serde_regex1;

Modified: hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out?rev=1463380&r1=1463379&r2=1463380&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out Tue Apr  2 04:49:19 2013
@@ -127,3 +127,124 @@ POSTHOOK: query: DROP TABLE serde_regex
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@serde_regex
 POSTHOOK: Output: default@serde_regex
+PREHOOK: query: EXPLAIN
+CREATE TABLE serde_regex1(
+  key decimal,
+  value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+  "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: EXPLAIN
+CREATE TABLE serde_regex1(
+  key decimal,
+  value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+  "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+ABSTRACT SYNTAX TREE:
+  (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE)
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+      Create Table Operator:
+        Create Table
+          columns: key decimal, value int
+          if not exists: false
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          # buckets: -1
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
+          serde properties:
+            input.regex ([^ ]*) ([^ ]*)
+          name: serde_regex1
+          isExternal: false
+
+
+PREHOOK: query: CREATE TABLE serde_regex1(
+  key decimal,
+  value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+  "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE serde_regex1(
+  key decimal,
+  value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+  "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@serde_regex1
+PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@serde_regex1
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@serde_regex1
+PREHOOK: query: SELECT key, value FROM serde_regex1 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@serde_regex1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM serde_regex1 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@serde_regex1
+#### A masked pattern was here ####
+-1234567890.123456789	-1234567890
+-4.4E+3	4400
+-1255.49	-1255
+-1.122	-11
+-1.12	-1
+-1.12	-1
+-0.333	0
+-0.33	0
+-0.3	0
+0	0
+0	0
+1E-99	0
+0.01	0
+0.02	0
+0.1	0
+0.2	0
+0.3	0
+0.33	0
+0.333	0
+0.9999999999999999999999999	1
+1	1
+1	1
+1.12	1
+1.122	1
+2	2
+2	2
+3.14	3
+3.14	3
+3.14	3
+3.14	4
+1E+1	10
+2E+1	20
+1E+2	100
+124	124
+125.2	125
+2E+2	200
+1234567890.12345678	1234567890
+1E+99	0
+PREHOOK: query: DROP TABLE serde_regex1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@serde_regex1
+PREHOOK: Output: default@serde_regex1
+POSTHOOK: query: DROP TABLE serde_regex1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@serde_regex1
+POSTHOOK: Output: default@serde_regex1

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java?rev=1463380&r1=1463379&r2=1463380&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java Tue Apr  2 04:49:19 2013
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.serde2;
 
+import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -49,7 +50,7 @@ import org.apache.hadoop.io.Writable;
  * but has more than expected groups, the additional groups are just ignored.
  *
  * NOTE: Regex SerDe supports primitive column types such as TINYINT, SMALLINT,
- * INT, BIGINT, FLOAT, DOUBLE, STRING and BOOLEAN
+ * INT, BIGINT, FLOAT, DOUBLE, STRING, BOOLEAN and DECIMAL
  *
  *
  * NOTE: This implementation uses javaStringObjectInspector for STRING. A
@@ -133,6 +134,8 @@ public class RegexSerDe extends Abstract
        columnOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
       } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
         columnOIs.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
+      } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
+        columnOIs.add(PrimitiveObjectInspectorFactory.javaBigDecimalObjectInspector);
       } else {
          throw new SerDeException(getClass().getName()
          + " doesn't allow column [" + c + "] named "
@@ -225,7 +228,11 @@ public class RegexSerDe extends Abstract
         } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
           Boolean b;
           b = Boolean.valueOf(t);
-          row.set(c,b);
+          row.set(c, b);
+        } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
+          BigDecimal bd;
+          bd = new BigDecimal(t);
+          row.set(c, bd);
         }
       } catch (RuntimeException e) {
          partialMatchedRowsCount++;