You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/02 06:49:20 UTC
svn commit: r1463380 - in /hive/trunk:
ql/src/test/queries/clientpositive/serde_regex.q
ql/src/test/results/clientpositive/serde_regex.q.out
serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
Author: hashutosh
Date: Tue Apr 2 04:49:19 2013
New Revision: 1463380
URL: http://svn.apache.org/r1463380
Log:
HIVE-3951 : Allow Decimal type columns in Regex Serde (Mark Grover via Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q
hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
Modified: hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q?rev=1463380&r1=1463379&r2=1463380&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/serde_regex.q Tue Apr 2 04:49:19 2013
@@ -39,3 +39,28 @@ SELECT * FROM serde_regex ORDER BY time;
SELECT host, size, status, time from serde_regex ORDER BY time;
DROP TABLE serde_regex;
+
+EXPLAIN
+CREATE TABLE serde_regex1(
+ key decimal,
+ value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+ "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE;
+
+CREATE TABLE serde_regex1(
+ key decimal,
+ value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+ "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1;
+
+SELECT key, value FROM serde_regex1 ORDER BY key;
+
+DROP TABLE serde_regex1;
Modified: hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out?rev=1463380&r1=1463379&r2=1463380&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/serde_regex.q.out Tue Apr 2 04:49:19 2013
@@ -127,3 +127,124 @@ POSTHOOK: query: DROP TABLE serde_regex
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@serde_regex
POSTHOOK: Output: default@serde_regex
+PREHOOK: query: EXPLAIN
+CREATE TABLE serde_regex1(
+ key decimal,
+ value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+ "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: EXPLAIN
+CREATE TABLE serde_regex1(
+ key decimal,
+ value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+ "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+ABSTRACT SYNTAX TREE:
+ (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE)
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Create Table Operator:
+ Create Table
+ columns: key decimal, value int
+ if not exists: false
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ # buckets: -1
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.RegexSerDe
+ serde properties:
+ input.regex ([^ ]*) ([^ ]*)
+ name: serde_regex1
+ isExternal: false
+
+
+PREHOOK: query: CREATE TABLE serde_regex1(
+ key decimal,
+ value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+ "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE serde_regex1(
+ key decimal,
+ value int)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
+WITH SERDEPROPERTIES (
+ "input.regex" = "([^ ]*) ([^ ]*)"
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@serde_regex1
+PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@serde_regex1
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@serde_regex1
+PREHOOK: query: SELECT key, value FROM serde_regex1 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@serde_regex1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM serde_regex1 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@serde_regex1
+#### A masked pattern was here ####
+-1234567890.123456789 -1234567890
+-4.4E+3 4400
+-1255.49 -1255
+-1.122 -11
+-1.12 -1
+-1.12 -1
+-0.333 0
+-0.33 0
+-0.3 0
+0 0
+0 0
+1E-99 0
+0.01 0
+0.02 0
+0.1 0
+0.2 0
+0.3 0
+0.33 0
+0.333 0
+0.9999999999999999999999999 1
+1 1
+1 1
+1.12 1
+1.122 1
+2 2
+2 2
+3.14 3
+3.14 3
+3.14 3
+3.14 4
+1E+1 10
+2E+1 20
+1E+2 100
+124 124
+125.2 125
+2E+2 200
+1234567890.12345678 1234567890
+1E+99 0
+PREHOOK: query: DROP TABLE serde_regex1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@serde_regex1
+PREHOOK: Output: default@serde_regex1
+POSTHOOK: query: DROP TABLE serde_regex1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@serde_regex1
+POSTHOOK: Output: default@serde_regex1
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java?rev=1463380&r1=1463379&r2=1463380&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java Tue Apr 2 04:49:19 2013
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2;
+import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -49,7 +50,7 @@ import org.apache.hadoop.io.Writable;
* but has more than expected groups, the additional groups are just ignored.
*
* NOTE: Regex SerDe supports primitive column types such as TINYINT, SMALLINT,
- * INT, BIGINT, FLOAT, DOUBLE, STRING and BOOLEAN
+ * INT, BIGINT, FLOAT, DOUBLE, STRING, BOOLEAN and DECIMAL
*
*
* NOTE: This implementation uses javaStringObjectInspector for STRING. A
@@ -133,6 +134,8 @@ public class RegexSerDe extends Abstract
columnOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
} else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
columnOIs.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
+ } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
+ columnOIs.add(PrimitiveObjectInspectorFactory.javaBigDecimalObjectInspector);
} else {
throw new SerDeException(getClass().getName()
+ " doesn't allow column [" + c + "] named "
@@ -225,7 +228,11 @@ public class RegexSerDe extends Abstract
} else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
Boolean b;
b = Boolean.valueOf(t);
- row.set(c,b);
+ row.set(c, b);
+ } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
+ BigDecimal bd;
+ bd = new BigDecimal(t);
+ row.set(c, bd);
}
} catch (RuntimeException e) {
partialMatchedRowsCount++;