You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/10/05 22:26:45 UTC
svn commit: r821992 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/udf/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: zshao
Date: Mon Oct 5 20:26:44 2009
New Revision: 821992
URL: http://svn.apache.org/viewvc?rev=821992&view=rev
Log:
HIVE-236. RLIKE/REGEXP allowing matching of partial strings. (Paul Yang via zshao)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java
hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=821992&r1=821991&r2=821992&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Oct 5 20:26:44 2009
@@ -4,6 +4,9 @@
INCOMPATIBLE CHANGES
+ HIVE-236. RLIKE/REGEXP allowing matching of partial strings.
+ (Paul Yang via zshao)
+
NEW FEATURES
HIVE-743. Let user specify serde for custom scripts.
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java?rev=821992&r1=821991&r2=821992&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java Mon Oct 5 20:26:44 2009
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.udf;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.description;
import org.apache.hadoop.io.BooleanWritable;
@@ -36,8 +38,11 @@
)
public class UDFRegExp extends UDF {
+ static final Log LOG = LogFactory.getLog(UDFRegExp.class.getName());
+
private Text lastRegex = new Text();
private Pattern p = null;
+ boolean warned = false;
BooleanWritable result = new BooleanWritable();
public UDFRegExp() {
@@ -47,12 +52,21 @@
if (s == null || regex == null) {
return null;
}
+ if(regex.getLength()==0) {
+ if(!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " regex is empty. Additional " +
+ "warnings for an empty regex will be suppressed.");
+ }
+ result.set(false);
+ return result;
+ }
if (!regex.equals(lastRegex) || p == null) {
lastRegex.set(regex);
p = Pattern.compile(regex.toString());
}
Matcher m = p.matcher(s.toString());
- result.set(m.matches());
+ result.set(m.find(0));
return result;
}
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q?rev=821992&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q Mon Oct 5 20:26:44 2009
@@ -0,0 +1,7 @@
+DESCRIBE FUNCTION regexp;
+
+DESCRIBE FUNCTION EXTENDED regexp;
+
+SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
+'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
+FROM src LIMIT 1;
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out?rev=821992&r1=821991&r2=821992&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out Mon Oct 5 20:26:44 2009
@@ -110,10 +110,10 @@
Move Operator
files:
hdfs directory: true
- destination: file:/data/users/njain/hive5/hive5/build/ql/tmp/509385687/10000
+ destination: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/594380230/10000
Map Reduce
Alias -> Map Operator Tree:
- file:/data/users/njain/hive5/hive5/build/ql/tmp/1277759891/10002
+ file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/501326158/10002
Reduce Output Operator
sort order:
Map-reduce partition columns:
@@ -206,9 +206,9 @@
PREHOOK: query: SELECT dest1.* FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000
+PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000
POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000
-true false true true true false false false true true false true false acc abc abb hive hadoop AaAbAcA false
+POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000
+true false true true true false false false true true false true true acc abc abb hive hadoop AaAbAcA false
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out?rev=821992&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out Mon Oct 5 20:26:44 2009
@@ -0,0 +1,26 @@
+PREHOOK: query: DESCRIBE FUNCTION regexp
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION regexp
+POSTHOOK: type: DESCFUNCTION
+str regexp regexp - Returns true if str matches regexp and false otherwise
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED regexp
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED regexp
+POSTHOOK: type: DESCFUNCTION
+str regexp regexp - Returns true if str matches regexp and false otherwise
+Example:
+ > SELECT 'fb' regexp '.*' FROM src LIMIT 1;
+ true
+PREHOOK: query: SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
+'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
+FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/852993560/10000
+POSTHOOK: query: SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
+'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
+FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/852993560/10000
+true true true true false false false false