You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bi...@apache.org on 2011/12/09 17:59:12 UTC

svn commit: r1212517 - in /incubator/accumulo/branches/1.4/src/core/src: main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java

Author: billie
Date: Fri Dec  9 16:59:12 2011
New Revision: 1212517

URL: http://svn.apache.org/viewvc?rev=1212517&view=rev
Log:
ACCUMULO-209 applied patch

Modified:
    incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
    incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java

Modified: incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java?rev=1212517&r1=1212516&r2=1212517&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java (original)
+++ incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java Fri Dec  9 16:59:12 2011
@@ -17,6 +17,7 @@
 package org.apache.accumulo.core.iterators.user;
 
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -28,7 +29,6 @@ import org.apache.accumulo.core.data.Val
 import org.apache.accumulo.core.iterators.Filter;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.util.ByteArrayBackedCharSequence;
 
 /**
  * A Filter that matches entries based on Java regular expressions.
@@ -52,6 +52,9 @@ public class RegExFilter extends Filter 
   public static final String COLQ_REGEX = "colqRegex";
   public static final String VALUE_REGEX = "valueRegex";
   public static final String OR_FIELDS = "orFields";
+  public static final String ENCODING = "encoding";
+  
+  public static final String ENCODING_DEFAULT = "UTF-8";
   
   private Matcher rowMatcher;
   private Matcher colfMatcher;
@@ -59,33 +62,36 @@ public class RegExFilter extends Filter 
   private Matcher valueMatcher;
   private boolean orFields = false;
   
-  private ByteArrayBackedCharSequence babcs = new ByteArrayBackedCharSequence();
+  private String encoding = ENCODING_DEFAULT;
   
-  private Matcher copyMatcher(Matcher m)
-  {
-	  if(m == null)
-		  return m;
-	  else
-		  return m.pattern().matcher("");
+  private Matcher copyMatcher(Matcher m) {
+    if (m == null)
+      return m;
+    else
+      return m.pattern().matcher("");
   }
   
   private boolean matches(Matcher matcher, ByteSequence bs) {
     if (matcher != null) {
-      babcs.set(bs);
-      matcher.reset(babcs);
-      return matcher.matches();
+      try {
+        matcher.reset(new String(bs.getBackingArray(), encoding));
+        return matcher.matches();
+      } catch (UnsupportedEncodingException e) {
+        e.printStackTrace();
+      }
     }
-    
     return !orFields;
   }
   
   private boolean matches(Matcher matcher, byte data[], int offset, int len) {
     if (matcher != null) {
-      babcs.set(data, offset, len);
-      matcher.reset(babcs);
-      return matcher.matches();
+      try {
+        matcher.reset(new String(data, offset, len, encoding));
+        return matcher.matches();
+      } catch (UnsupportedEncodingException e) {
+        e.printStackTrace();
+      }
     }
-    
     return !orFields;
   }
   
@@ -130,6 +136,10 @@ public class RegExFilter extends Filter 
     } else {
       orFields = false;
     }
+    
+    if (options.containsKey(ENCODING)) {
+      encoding = options.get(ENCODING);
+    }
   }
   
   @Override
@@ -142,6 +152,7 @@ public class RegExFilter extends Filter 
     io.addNamedOption(RegExFilter.COLQ_REGEX, "regular expression on column qualifier");
     io.addNamedOption(RegExFilter.VALUE_REGEX, "regular expression on value");
     io.addNamedOption(RegExFilter.OR_FIELDS, "use OR instread of AND when multiple regexes given");
+    io.addNamedOption(RegExFilter.ENCODING, "character encoding of byte array value (default is " + ENCODING_DEFAULT + ")");
     return io;
   }
   
@@ -160,6 +171,17 @@ public class RegExFilter extends Filter 
     if (options.containsKey(VALUE_REGEX))
       Pattern.compile(options.get(VALUE_REGEX)).matcher("");
     
+    if (options.containsKey(ENCODING)) {
+      try {
+        this.encoding = options.get(ENCODING);
+        @SuppressWarnings("unused")
+        String test = new String("test".getBytes(), encoding);
+      } catch (UnsupportedEncodingException e) {
+        e.printStackTrace();
+        return false;
+      }
+    }
+    
     return true;
   }
   
@@ -192,4 +214,19 @@ public class RegExFilter extends Filter 
       si.addOption(RegExFilter.OR_FIELDS, "true");
     }
   }
+  
+  /**
+   * Set the encoding string to use when interpreting characters
+   * 
+   * @param si
+   *          ScanIterator config to be updated
+   * @param encoding
+   *          the encoding string to use for character interpretation.
+   * 
+   */
+  public static void setEncoding(IteratorSetting si, String encoding) {
+    if (!encoding.isEmpty()) {
+      si.addOption(RegExFilter.ENCODING, encoding);
+    }
+  }
 }

Modified: incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java?rev=1212517&r1=1212516&r2=1212517&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java (original)
+++ incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java Fri Dec  9 16:59:12 2011
@@ -199,5 +199,25 @@ public class RegExFilterTest extends Tes
     rei.init(new SortedMapIterator(tm), is.getProperties(), new DefaultIteratorEnvironment());
     rei.seek(new Range(), EMPTY_COL_FAMS, false);
     rei.deepCopy(new DefaultIteratorEnvironment());
+    
+    // -----------------------------------------------------
+    String multiByteText = new String("\u6d67" + "\u6F68" + "\u7067");
+    String multiByteRegex = new String(".*" + "\u6F68" + ".*");
+    
+    Key k4 = new Key("boo4".getBytes(), "hoo".getBytes(), "20080203".getBytes(), "".getBytes(), 1l);
+    Value inVal = new Value(multiByteText.getBytes("UTF-8"));
+    tm.put(k4, inVal);
+    
+    is.clearOptions();
+    
+    RegExFilter.setRegexs(is, null, null, null, multiByteRegex, true);
+    rei.init(new SortedMapIterator(tm), is.getProperties(), new DefaultIteratorEnvironment());
+    rei.seek(new Range(), EMPTY_COL_FAMS, false);
+    
+    assertTrue(rei.hasTop());
+    Value outValue = rei.getTopValue();
+    String outVal = new String(outValue.get(), "UTF-8");
+    assertTrue(outVal.equals(multiByteText));
+    
   }
 }