You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bi...@apache.org on 2011/12/09 17:59:12 UTC
svn commit: r1212517 - in /incubator/accumulo/branches/1.4/src/core/src:
main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
Author: billie
Date: Fri Dec 9 16:59:12 2011
New Revision: 1212517
URL: http://svn.apache.org/viewvc?rev=1212517&view=rev
Log:
ACCUMULO-209 applied patch
Modified:
incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
Modified: incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java?rev=1212517&r1=1212516&r2=1212517&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java (original)
+++ incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java Fri Dec 9 16:59:12 2011
@@ -17,6 +17,7 @@
package org.apache.accumulo.core.iterators.user;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -28,7 +29,6 @@ import org.apache.accumulo.core.data.Val
import org.apache.accumulo.core.iterators.Filter;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.util.ByteArrayBackedCharSequence;
/**
* A Filter that matches entries based on Java regular expressions.
@@ -52,6 +52,9 @@ public class RegExFilter extends Filter
public static final String COLQ_REGEX = "colqRegex";
public static final String VALUE_REGEX = "valueRegex";
public static final String OR_FIELDS = "orFields";
+ public static final String ENCODING = "encoding";
+
+ public static final String ENCODING_DEFAULT = "UTF-8";
private Matcher rowMatcher;
private Matcher colfMatcher;
@@ -59,33 +62,36 @@ public class RegExFilter extends Filter
private Matcher valueMatcher;
private boolean orFields = false;
- private ByteArrayBackedCharSequence babcs = new ByteArrayBackedCharSequence();
+ private String encoding = ENCODING_DEFAULT;
- private Matcher copyMatcher(Matcher m)
- {
- if(m == null)
- return m;
- else
- return m.pattern().matcher("");
+ private Matcher copyMatcher(Matcher m) {
+ if (m == null)
+ return m;
+ else
+ return m.pattern().matcher("");
}
private boolean matches(Matcher matcher, ByteSequence bs) {
if (matcher != null) {
- babcs.set(bs);
- matcher.reset(babcs);
- return matcher.matches();
+ try {
+ matcher.reset(new String(bs.getBackingArray(), encoding));
+ return matcher.matches();
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
}
-
return !orFields;
}
private boolean matches(Matcher matcher, byte data[], int offset, int len) {
if (matcher != null) {
- babcs.set(data, offset, len);
- matcher.reset(babcs);
- return matcher.matches();
+ try {
+ matcher.reset(new String(data, offset, len, encoding));
+ return matcher.matches();
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
}
-
return !orFields;
}
@@ -130,6 +136,10 @@ public class RegExFilter extends Filter
} else {
orFields = false;
}
+
+ if (options.containsKey(ENCODING)) {
+ encoding = options.get(ENCODING);
+ }
}
@Override
@@ -142,6 +152,7 @@ public class RegExFilter extends Filter
io.addNamedOption(RegExFilter.COLQ_REGEX, "regular expression on column qualifier");
io.addNamedOption(RegExFilter.VALUE_REGEX, "regular expression on value");
io.addNamedOption(RegExFilter.OR_FIELDS, "use OR instread of AND when multiple regexes given");
+ io.addNamedOption(RegExFilter.ENCODING, "character encoding of byte array value (default is " + ENCODING_DEFAULT + ")");
return io;
}
@@ -160,6 +171,17 @@ public class RegExFilter extends Filter
if (options.containsKey(VALUE_REGEX))
Pattern.compile(options.get(VALUE_REGEX)).matcher("");
+ if (options.containsKey(ENCODING)) {
+ try {
+ this.encoding = options.get(ENCODING);
+ @SuppressWarnings("unused")
+ String test = new String("test".getBytes(), encoding);
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }
+
return true;
}
@@ -192,4 +214,19 @@ public class RegExFilter extends Filter
si.addOption(RegExFilter.OR_FIELDS, "true");
}
}
+
+ /**
+ * Set the encoding string to use when interpreting characters
+ *
+ * @param si
+ * ScanIterator config to be updated
+ * @param encoding
+ * the encoding string to use for character interpretation.
+ *
+ */
+ public static void setEncoding(IteratorSetting si, String encoding) {
+ if (!encoding.isEmpty()) {
+ si.addOption(RegExFilter.ENCODING, encoding);
+ }
+ }
}
Modified: incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java?rev=1212517&r1=1212516&r2=1212517&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java (original)
+++ incubator/accumulo/branches/1.4/src/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java Fri Dec 9 16:59:12 2011
@@ -199,5 +199,25 @@ public class RegExFilterTest extends Tes
rei.init(new SortedMapIterator(tm), is.getProperties(), new DefaultIteratorEnvironment());
rei.seek(new Range(), EMPTY_COL_FAMS, false);
rei.deepCopy(new DefaultIteratorEnvironment());
+
+ // -----------------------------------------------------
+ String multiByteText = new String("\u6d67" + "\u6F68" + "\u7067");
+ String multiByteRegex = new String(".*" + "\u6F68" + ".*");
+
+ Key k4 = new Key("boo4".getBytes(), "hoo".getBytes(), "20080203".getBytes(), "".getBytes(), 1l);
+ Value inVal = new Value(multiByteText.getBytes("UTF-8"));
+ tm.put(k4, inVal);
+
+ is.clearOptions();
+
+ RegExFilter.setRegexs(is, null, null, null, multiByteRegex, true);
+ rei.init(new SortedMapIterator(tm), is.getProperties(), new DefaultIteratorEnvironment());
+ rei.seek(new Range(), EMPTY_COL_FAMS, false);
+
+ assertTrue(rei.hasTop());
+ Value outValue = rei.getTopValue();
+ String outVal = new String(outValue.get(), "UTF-8");
+ assertTrue(outVal.equals(multiByteText));
+
}
}