You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/10/05 22:26:45 UTC

svn commit: r821992 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/udf/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: zshao
Date: Mon Oct  5 20:26:44 2009
New Revision: 821992

URL: http://svn.apache.org/viewvc?rev=821992&view=rev
Log:
HIVE-236. RLIKE/REGEXP allowing matching of partial strings. (Paul Yang via zshao)

Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=821992&r1=821991&r2=821992&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Oct  5 20:26:44 2009
@@ -4,6 +4,9 @@
 
   INCOMPATIBLE CHANGES
 
+    HIVE-236. RLIKE/REGEXP allowing matching of partial strings.
+    (Paul Yang via zshao)
+
   NEW FEATURES
 
     HIVE-743. Let user specify serde for custom scripts.

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java?rev=821992&r1=821991&r2=821992&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java Mon Oct  5 20:26:44 2009
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.udf;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.hive.ql.exec.description;
 import org.apache.hadoop.io.BooleanWritable;
@@ -36,8 +38,11 @@
     )
 public class UDFRegExp extends UDF {
 
+  static final Log LOG = LogFactory.getLog(UDFRegExp.class.getName());
+  
   private Text lastRegex = new Text();
   private Pattern p = null;
+  boolean warned = false;
 
   BooleanWritable result = new BooleanWritable();
   public UDFRegExp() {
@@ -47,12 +52,21 @@
     if (s == null || regex == null) {
       return null;
     }
+    if(regex.getLength()==0) {
+      if(!warned) {
+        warned = true;
+        LOG.warn(getClass().getSimpleName() + " regex is empty. Additional " +
+            "warnings for an empty regex will be suppressed.");
+      }
+      result.set(false);
+      return result;
+    }
     if (!regex.equals(lastRegex) || p == null) {
       lastRegex.set(regex);
       p = Pattern.compile(regex.toString());
     }
     Matcher m = p.matcher(s.toString());
-    result.set(m.matches());
+    result.set(m.find(0));
     return result;
   }
 

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q?rev=821992&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_regexp.q Mon Oct  5 20:26:44 2009
@@ -0,0 +1,7 @@
+DESCRIBE FUNCTION regexp;
+
+DESCRIBE FUNCTION EXTENDED regexp;
+
+SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
+'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
+FROM src LIMIT 1;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out?rev=821992&r1=821991&r2=821992&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out Mon Oct  5 20:26:44 2009
@@ -110,10 +110,10 @@
           Move Operator
             files:
                 hdfs directory: true
-                destination: file:/data/users/njain/hive5/hive5/build/ql/tmp/509385687/10000
+                destination: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/594380230/10000
           Map Reduce
             Alias -> Map Operator Tree:
-              file:/data/users/njain/hive5/hive5/build/ql/tmp/1277759891/10002 
+              file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/501326158/10002 
                   Reduce Output Operator
                     sort order: 
                     Map-reduce partition columns:
@@ -206,9 +206,9 @@
 PREHOOK: query: SELECT dest1.* FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000
+PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000
 POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000
-true	false	true	true	true	false	false	false	true	true	false	true	false	acc	abc	abb	hive	hadoop	AaAbAcA	false
+POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000
+true	false	true	true	true	false	false	false	true	true	false	true	true	acc	abc	abb	hive	hadoop	AaAbAcA	false

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out?rev=821992&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_regexp.q.out Mon Oct  5 20:26:44 2009
@@ -0,0 +1,26 @@
+PREHOOK: query: DESCRIBE FUNCTION regexp
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION regexp
+POSTHOOK: type: DESCFUNCTION
+str regexp regexp - Returns true if str matches regexp and false otherwise
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED regexp
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED regexp
+POSTHOOK: type: DESCFUNCTION
+str regexp regexp - Returns true if str matches regexp and false otherwise
+Example:
+  > SELECT 'fb' regexp '.*' FROM src LIMIT 1;
+  true
+PREHOOK: query: SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
+'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
+FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/852993560/10000
+POSTHOOK: query: SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
+'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
+FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/852993560/10000
+true	true	true	true	false	false	false	false